From b51965dddbed68f23c5e8c169c23c794313ce5f6 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 28 Jun 2011 17:17:23 +0200 Subject: Add boost subset as an implementation detail --- cutl/details/boost/regex/config.hpp | 430 +++ cutl/details/boost/regex/config/cwchar.hpp | 207 ++ cutl/details/boost/regex/icu.hpp | 1029 +++++++ cutl/details/boost/regex/pattern_except.hpp | 100 + cutl/details/boost/regex/pending/object_cache.hpp | 165 ++ cutl/details/boost/regex/pending/static_mutex.hpp | 180 ++ .../boost/regex/pending/unicode_iterator.hpp | 692 +++++ cutl/details/boost/regex/regex_traits.hpp | 35 + cutl/details/boost/regex/src/c_regex_traits.cxx | 212 ++ cutl/details/boost/regex/src/cpp_regex_traits.cxx | 117 + cutl/details/boost/regex/src/cregex.cxx | 647 +++++ cutl/details/boost/regex/src/fileiter.cxx | 919 +++++++ cutl/details/boost/regex/src/icu.cxx | 507 ++++ cutl/details/boost/regex/src/instances.cxx | 32 + cutl/details/boost/regex/src/posix_api.cxx | 289 ++ cutl/details/boost/regex/src/regex.cxx | 226 ++ cutl/details/boost/regex/src/regex_debug.cxx | 59 + cutl/details/boost/regex/src/regex_raw_buffer.cxx | 70 + .../boost/regex/src/regex_traits_defaults.cxx | 692 +++++ cutl/details/boost/regex/src/static_mutex.cxx | 179 ++ cutl/details/boost/regex/src/usinstances.cxx | 81 + cutl/details/boost/regex/src/w32_regex_traits.cxx | 650 +++++ cutl/details/boost/regex/src/wc_regex_traits.cxx | 320 +++ cutl/details/boost/regex/src/wide_posix_api.cxx | 315 +++ cutl/details/boost/regex/src/winstances.cxx | 35 + cutl/details/boost/regex/user.hpp | 90 + cutl/details/boost/regex/v4/basic_regex.hpp | 779 ++++++ .../details/boost/regex/v4/basic_regex_creator.hpp | 1560 +++++++++++ cutl/details/boost/regex/v4/basic_regex_parser.hpp | 2817 ++++++++++++++++++++ cutl/details/boost/regex/v4/c_regex_traits.hpp | 211 ++ cutl/details/boost/regex/v4/char_regex_traits.hpp | 81 + cutl/details/boost/regex/v4/cpp_regex_traits.hpp | 1088 ++++++++ cutl/details/boost/regex/v4/cregex.hpp | 330 +++ cutl/details/boost/regex/v4/error_type.hpp | 59 + cutl/details/boost/regex/v4/fileiter.hpp | 455 ++++ cutl/details/boost/regex/v4/instances.hpp | 219 ++ cutl/details/boost/regex/v4/iterator_category.hpp | 91 + cutl/details/boost/regex/v4/iterator_traits.hpp | 135 + cutl/details/boost/regex/v4/match_flags.hpp | 138 + cutl/details/boost/regex/v4/match_results.hpp | 699 +++++ cutl/details/boost/regex/v4/mem_block_cache.hpp | 99 + cutl/details/boost/regex/v4/perl_matcher.hpp | 584 ++++ .../details/boost/regex/v4/perl_matcher_common.hpp | 990 +++++++ .../boost/regex/v4/perl_matcher_non_recursive.hpp | 1639 ++++++++++++ .../boost/regex/v4/perl_matcher_recursive.hpp | 991 +++++++ cutl/details/boost/regex/v4/primary_transform.hpp | 146 + cutl/details/boost/regex/v4/protected_call.hpp | 81 + cutl/details/boost/regex/v4/regbase.hpp | 180 ++ cutl/details/boost/regex/v4/regex.hpp | 202 ++ cutl/details/boost/regex/v4/regex_format.hpp | 1141 ++++++++ cutl/details/boost/regex/v4/regex_fwd.hpp | 73 + cutl/details/boost/regex/v4/regex_grep.hpp | 155 ++ cutl/details/boost/regex/v4/regex_iterator.hpp | 201 ++ cutl/details/boost/regex/v4/regex_match.hpp | 382 +++ cutl/details/boost/regex/v4/regex_merge.hpp | 93 + cutl/details/boost/regex/v4/regex_raw_buffer.hpp | 210 ++ cutl/details/boost/regex/v4/regex_replace.hpp | 99 + cutl/details/boost/regex/v4/regex_search.hpp | 217 ++ cutl/details/boost/regex/v4/regex_split.hpp | 172 ++ .../boost/regex/v4/regex_token_iterator.hpp | 342 +++ cutl/details/boost/regex/v4/regex_traits.hpp | 189 ++ .../boost/regex/v4/regex_traits_defaults.hpp | 371 +++ cutl/details/boost/regex/v4/regex_workaround.hpp | 232 ++ cutl/details/boost/regex/v4/states.hpp | 301 +++ cutl/details/boost/regex/v4/sub_match.hpp | 512 ++++ cutl/details/boost/regex/v4/syntax_type.hpp | 105 + cutl/details/boost/regex/v4/u32regex_iterator.hpp | 193 ++ .../boost/regex/v4/u32regex_token_iterator.hpp | 377 +++ cutl/details/boost/regex/v4/w32_regex_traits.hpp | 741 +++++ 69 files changed, 27958 insertions(+) create mode 100644 cutl/details/boost/regex/config.hpp create mode 100644 cutl/details/boost/regex/config/cwchar.hpp create mode 100644 cutl/details/boost/regex/icu.hpp create mode 100644 cutl/details/boost/regex/pattern_except.hpp create mode 100644 cutl/details/boost/regex/pending/object_cache.hpp create mode 100644 cutl/details/boost/regex/pending/static_mutex.hpp create mode 100644 cutl/details/boost/regex/pending/unicode_iterator.hpp create mode 100644 cutl/details/boost/regex/regex_traits.hpp create mode 100644 cutl/details/boost/regex/src/c_regex_traits.cxx create mode 100644 cutl/details/boost/regex/src/cpp_regex_traits.cxx create mode 100644 cutl/details/boost/regex/src/cregex.cxx create mode 100644 cutl/details/boost/regex/src/fileiter.cxx create mode 100644 cutl/details/boost/regex/src/icu.cxx create mode 100644 cutl/details/boost/regex/src/instances.cxx create mode 100644 cutl/details/boost/regex/src/posix_api.cxx create mode 100644 cutl/details/boost/regex/src/regex.cxx create mode 100644 cutl/details/boost/regex/src/regex_debug.cxx create mode 100644 cutl/details/boost/regex/src/regex_raw_buffer.cxx create mode 100644 cutl/details/boost/regex/src/regex_traits_defaults.cxx create mode 100644 cutl/details/boost/regex/src/static_mutex.cxx create mode 100644 cutl/details/boost/regex/src/usinstances.cxx create mode 100644 cutl/details/boost/regex/src/w32_regex_traits.cxx create mode 100644 cutl/details/boost/regex/src/wc_regex_traits.cxx create mode 100644 cutl/details/boost/regex/src/wide_posix_api.cxx create mode 100644 cutl/details/boost/regex/src/winstances.cxx create mode 100644 cutl/details/boost/regex/user.hpp create mode 100644 cutl/details/boost/regex/v4/basic_regex.hpp create mode 100644 cutl/details/boost/regex/v4/basic_regex_creator.hpp create mode 100644 cutl/details/boost/regex/v4/basic_regex_parser.hpp create mode 100644 cutl/details/boost/regex/v4/c_regex_traits.hpp create mode 100644 cutl/details/boost/regex/v4/char_regex_traits.hpp create mode 100644 cutl/details/boost/regex/v4/cpp_regex_traits.hpp create mode 100644 cutl/details/boost/regex/v4/cregex.hpp create mode 100644 cutl/details/boost/regex/v4/error_type.hpp create mode 100644 cutl/details/boost/regex/v4/fileiter.hpp create mode 100644 cutl/details/boost/regex/v4/instances.hpp create mode 100644 cutl/details/boost/regex/v4/iterator_category.hpp create mode 100644 cutl/details/boost/regex/v4/iterator_traits.hpp create mode 100644 cutl/details/boost/regex/v4/match_flags.hpp create mode 100644 cutl/details/boost/regex/v4/match_results.hpp create mode 100644 cutl/details/boost/regex/v4/mem_block_cache.hpp create mode 100644 cutl/details/boost/regex/v4/perl_matcher.hpp create mode 100644 cutl/details/boost/regex/v4/perl_matcher_common.hpp create mode 100644 cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp create mode 100644 cutl/details/boost/regex/v4/perl_matcher_recursive.hpp create mode 100644 cutl/details/boost/regex/v4/primary_transform.hpp create mode 100644 cutl/details/boost/regex/v4/protected_call.hpp create mode 100644 cutl/details/boost/regex/v4/regbase.hpp create mode 100644 cutl/details/boost/regex/v4/regex.hpp create mode 100644 cutl/details/boost/regex/v4/regex_format.hpp create mode 100644 cutl/details/boost/regex/v4/regex_fwd.hpp create mode 100644 cutl/details/boost/regex/v4/regex_grep.hpp create mode 100644 cutl/details/boost/regex/v4/regex_iterator.hpp create mode 100644 cutl/details/boost/regex/v4/regex_match.hpp create mode 100644 cutl/details/boost/regex/v4/regex_merge.hpp create mode 100644 cutl/details/boost/regex/v4/regex_raw_buffer.hpp create mode 100644 cutl/details/boost/regex/v4/regex_replace.hpp create mode 100644 cutl/details/boost/regex/v4/regex_search.hpp create mode 100644 cutl/details/boost/regex/v4/regex_split.hpp create mode 100644 cutl/details/boost/regex/v4/regex_token_iterator.hpp create mode 100644 cutl/details/boost/regex/v4/regex_traits.hpp create mode 100644 cutl/details/boost/regex/v4/regex_traits_defaults.hpp create mode 100644 cutl/details/boost/regex/v4/regex_workaround.hpp create mode 100644 cutl/details/boost/regex/v4/states.hpp create mode 100644 cutl/details/boost/regex/v4/sub_match.hpp create mode 100644 cutl/details/boost/regex/v4/syntax_type.hpp create mode 100644 cutl/details/boost/regex/v4/u32regex_iterator.hpp create mode 100644 cutl/details/boost/regex/v4/u32regex_token_iterator.hpp create mode 100644 cutl/details/boost/regex/v4/w32_regex_traits.hpp (limited to 'cutl/details/boost/regex') diff --git a/cutl/details/boost/regex/config.hpp b/cutl/details/boost/regex/config.hpp new file mode 100644 index 0000000..dd28408 --- /dev/null +++ b/cutl/details/boost/regex/config.hpp @@ -0,0 +1,430 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE config.hpp + * VERSION see + * DESCRIPTION: regex extended config setup. + */ + +#ifndef BOOST_REGEX_CONFIG_HPP +#define BOOST_REGEX_CONFIG_HPP +/* + * Borland C++ Fix/error check + * this has to go *before* we include any std lib headers: + */ +#if defined(__BORLANDC__) +# include +#endif + +/***************************************************************************** + * + * Include all the headers we need here: + * + ****************************************************************************/ + +#ifdef __cplusplus + +# ifndef BOOST_REGEX_USER_CONFIG +# define BOOST_REGEX_USER_CONFIG +# endif + +# include BOOST_REGEX_USER_CONFIG + +# include + +#else + /* + * C build, + * don't include because that may + * do C++ specific things in future... + */ +# include +# include +# ifdef _MSC_VER +# define BOOST_MSVC _MSC_VER +# endif +#endif + +/***************************************************************************** + * + * Boilerplate regex config options: + * + ****************************************************************************/ + +/* Obsolete macro, use BOOST_VERSION instead: */ +#define BOOST_RE_VERSION 320 + +/* fix: */ +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +/* + * Fix for gcc prior to 3.4: std::ctype doesn't allow + * masks to be combined, for example: + * std::use_facet >.is(std::ctype_base::lower|std::ctype_base::upper, L'a'); + * returns *false*. + */ +#ifdef __GLIBCPP__ +# define BOOST_REGEX_BUGGY_CTYPE_FACET +#endif + +/* + * Intel C++ before 8.0 ends up with unresolved externals unless we turn off + * extern template support: + */ +#if defined(BOOST_INTEL) && defined(__cplusplus) && (BOOST_INTEL <= 800) +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +#endif +/* + * Visual C++ doesn't support external templates with C++ extensions turned off: + */ +#if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS) +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +#endif +/* + * Shared regex lib will crash without this, frankly it looks a lot like a gcc bug: + */ +#if defined(__MINGW32__) +# define BOOST_REGEX_NO_EXTERNAL_TEMPLATES +#endif + +/* + * If there isn't good enough wide character support then there will + * be no wide character regular expressions: + */ +#if (defined(BOOST_NO_CWCHAR) || defined(BOOST_NO_CWCTYPE) || defined(BOOST_NO_STD_WSTRING)) +# if !defined(BOOST_NO_WREGEX) +# define BOOST_NO_WREGEX +# endif +#else +# if defined(__sgi) && (defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) + /* STLPort on IRIX is misconfigured: does not compile + * as a temporary fix include instead and prevent inclusion + * of STLPort version of */ +# include +# define __STLPORT_CWCTYPE +# define _STLP_CWCTYPE +# endif + +#ifdef __cplusplus +# include +#endif + +#endif + +/* + * If Win32 support has been disabled for boost in general, then + * it is for regex in particular: + */ +#if defined(BOOST_DISABLE_WIN32) && !defined(BOOST_REGEX_NO_W32) +# define BOOST_REGEX_NO_W32 +#endif + +/* disable our own file-iterators and mapfiles if we can't + * support them: */ +#if !defined(BOOST_HAS_DIRENT_H) && !(defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)) +# define BOOST_REGEX_NO_FILEITER +#endif + +/* backwards compatibitity: */ +#if defined(BOOST_RE_NO_LIB) +# define BOOST_REGEX_NO_LIB +#endif + +#if defined(__GNUC__) && (defined(_WIN32) || defined(__CYGWIN__)) +/* gcc on win32 has problems if you include + (sporadically generates bad code). */ +# define BOOST_REGEX_NO_W32 +#endif +#if defined(__COMO__) && !defined(BOOST_REGEX_NO_W32) && !defined(_MSC_EXTENSIONS) +# define BOOST_REGEX_NO_W32 +#endif + +/***************************************************************************** + * + * Wide character workarounds: + * + ****************************************************************************/ + +/* + * define BOOST_REGEX_HAS_OTHER_WCHAR_T when wchar_t is a native type, but the users + * code may be built with wchar_t as unsigned short: basically when we're building + * with MSVC and the /Zc:wchar_t option we place some extra unsigned short versions + * of the non-inline functions in the library, so that users can still link to the lib, + * irrespective of whether their own code is built with /Zc:wchar_t. + */ +#if defined(__cplusplus) && (defined(BOOST_MSVC) || defined(__ICL)) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) && defined(BOOST_WINDOWS) && !defined(__SGI_STL_PORT) && !defined(_STLPORT_VERSION) && !defined(BOOST_RWSTD_VER) +# define BOOST_REGEX_HAS_OTHER_WCHAR_T +# ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable : 4251 4231 4660) +# endif +# if defined(_DLL) && defined(BOOST_MSVC) && (BOOST_MSVC < 1600) +# include + extern template class __declspec(dllimport) std::basic_string; +# endif +# ifdef BOOST_MSVC +# pragma warning(pop) +# endif +#endif + + +/***************************************************************************** + * + * Set up dll import/export options: + * + ****************************************************************************/ + +#ifndef BOOST_SYMBOL_EXPORT +# define BOOST_SYMBOL_EXPORT +# define BOOST_SYMBOL_IMPORT +#endif + +#if (defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK)) && !defined(BOOST_REGEX_STATIC_LINK) +# if defined(BOOST_REGEX_SOURCE) +# define BOOST_REGEX_DECL BOOST_SYMBOL_EXPORT +# define BOOST_REGEX_BUILD_DLL +# else +# define BOOST_REGEX_DECL BOOST_SYMBOL_IMPORT +# endif +#else +# define BOOST_REGEX_DECL +#endif + +#if !defined(BOOST_REGEX_NO_LIB) && !defined(BOOST_REGEX_SOURCE) && !defined(BOOST_ALL_NO_LIB) && defined(__cplusplus) +# define BOOST_LIB_NAME cutl_details_boost_regex +# if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# define BOOST_DYN_LINK +# endif +# ifdef BOOST_REGEX_DIAG +# define BOOST_LIB_DIAGNOSTIC +# endif +# include +#endif + +/***************************************************************************** + * + * Set up function call type: + * + ****************************************************************************/ + +#if defined(BOOST_MSVC) && (BOOST_MSVC >= 1200) && defined(_MSC_EXTENSIONS) +#if defined(_DEBUG) || defined(__MSVC_RUNTIME_CHECKS) || defined(_MANAGED) +# define BOOST_REGEX_CALL __cdecl +#else +# define BOOST_REGEX_CALL __fastcall +#endif +# define BOOST_REGEX_CCALL __cdecl +#endif + +#if defined(__BORLANDC__) && !defined(BOOST_DISABLE_WIN32) +# define BOOST_REGEX_CALL __fastcall +# define BOOST_REGEX_CCALL __stdcall +#endif + +#ifndef BOOST_REGEX_CALL +# define BOOST_REGEX_CALL +#endif +#ifndef BOOST_REGEX_CCALL +#define BOOST_REGEX_CCALL +#endif + +/***************************************************************************** + * + * Set up localisation model: + * + ****************************************************************************/ + +/* backwards compatibility: */ +#ifdef BOOST_RE_LOCALE_C +# define BOOST_REGEX_USE_C_LOCALE +#endif + +#ifdef BOOST_RE_LOCALE_CPP +# define BOOST_REGEX_USE_CPP_LOCALE +#endif + +#if defined(__CYGWIN__) +# define BOOST_REGEX_USE_C_LOCALE +#endif + +/* Win32 defaults to native Win32 locale: */ +#if defined(_WIN32) && !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) && !defined(BOOST_REGEX_NO_W32) +# define BOOST_REGEX_USE_WIN32_LOCALE +#endif +/* otherwise use C++ locale if supported: */ +#if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) && !defined(BOOST_NO_STD_LOCALE) +# define BOOST_REGEX_USE_CPP_LOCALE +#endif +/* otherwise use C+ locale: */ +#if !defined(BOOST_REGEX_USE_WIN32_LOCALE) && !defined(BOOST_REGEX_USE_C_LOCALE) && !defined(BOOST_REGEX_USE_CPP_LOCALE) +# define BOOST_REGEX_USE_C_LOCALE +#endif + +#ifndef BOOST_REGEX_MAX_STATE_COUNT +# define BOOST_REGEX_MAX_STATE_COUNT 100000000 +#endif + + +/***************************************************************************** + * + * Error Handling for exception free compilers: + * + ****************************************************************************/ + +#ifdef BOOST_NO_EXCEPTIONS +/* + * If there are no exceptions then we must report critical-errors + * the only way we know how; by terminating. + */ +#include +#include +#include + +# define BOOST_REGEX_NOEH_ASSERT(x)\ +if(0 == (x))\ +{\ + std::string s("Error: critical regex++ failure in: ");\ + s.append(#x);\ + std::runtime_error e(s);\ + cutl_details_boost::throw_exception(e);\ +} +#else +/* + * With exceptions then error handling is taken care of and + * there is no need for these checks: + */ +# define BOOST_REGEX_NOEH_ASSERT(x) +#endif + + +/***************************************************************************** + * + * Stack protection under MS Windows: + * + ****************************************************************************/ + +#if !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_V3) +# if(defined(_WIN32) || defined(_WIN64) || defined(_WINCE)) \ + && !defined(__GNUC__) \ + && !(defined(__BORLANDC__) && (__BORLANDC__ >= 0x600)) \ + && !(defined(__MWERKS__) && (__MWERKS__ <= 0x3003)) +# define BOOST_REGEX_HAS_MS_STACK_GUARD +# endif +#elif defined(BOOST_REGEX_HAS_MS_STACK_GUARD) +# undef BOOST_REGEX_HAS_MS_STACK_GUARD +#endif + +#if defined(__cplusplus) && defined(BOOST_REGEX_HAS_MS_STACK_GUARD) + +namespace cutl_details_boost{ +namespace re_detail{ + +BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page(); + +} +} + +#endif + + +/***************************************************************************** + * + * Algorithm selection and configuration: + * + ****************************************************************************/ + +#if !defined(BOOST_REGEX_RECURSIVE) && !defined(BOOST_REGEX_NON_RECURSIVE) +# if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && !defined(_STLP_DEBUG) && !defined(__STL_DEBUG) && !(defined(BOOST_MSVC) && (BOOST_MSVC >= 1400)) +# define BOOST_REGEX_RECURSIVE +# else +# define BOOST_REGEX_NON_RECURSIVE +# endif +#endif + +#ifdef BOOST_REGEX_NON_RECURSIVE +# ifdef BOOST_REGEX_RECURSIVE +# error "Can't set both BOOST_REGEX_RECURSIVE and BOOST_REGEX_NON_RECURSIVE" +# endif +# ifndef BOOST_REGEX_BLOCKSIZE +# define BOOST_REGEX_BLOCKSIZE 4096 +# endif +# if BOOST_REGEX_BLOCKSIZE < 512 +# error "BOOST_REGEX_BLOCKSIZE must be at least 512" +# endif +# ifndef BOOST_REGEX_MAX_BLOCKS +# define BOOST_REGEX_MAX_BLOCKS 1024 +# endif +# ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +# undef BOOST_REGEX_HAS_MS_STACK_GUARD +# endif +# ifndef BOOST_REGEX_MAX_CACHE_BLOCKS +# define BOOST_REGEX_MAX_CACHE_BLOCKS 16 +# endif +#endif + + +/***************************************************************************** + * + * helper memory allocation functions: + * + ****************************************************************************/ + +#if defined(__cplusplus) && defined(BOOST_REGEX_NON_RECURSIVE) +namespace cutl_details_boost{ namespace re_detail{ + +BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block(); +BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void*); + +}} /* namespaces */ +#endif + +/***************************************************************************** + * + * Diagnostics: + * + ****************************************************************************/ + +#ifdef BOOST_REGEX_CONFIG_INFO +BOOST_REGEX_DECL void BOOST_REGEX_CALL print_regex_library_info(); +#endif + +#if defined(BOOST_REGEX_DIAG) +# pragma message ("BOOST_REGEX_DECL" BOOST_STRINGIZE(=BOOST_REGEX_DECL)) +# pragma message ("BOOST_REGEX_CALL" BOOST_STRINGIZE(=BOOST_REGEX_CALL)) +# pragma message ("BOOST_REGEX_CCALL" BOOST_STRINGIZE(=BOOST_REGEX_CCALL)) +#ifdef BOOST_REGEX_USE_C_LOCALE +# pragma message ("Using C locale in regex traits class") +#elif BOOST_REGEX_USE_CPP_LOCALE +# pragma message ("Using C++ locale in regex traits class") +#else +# pragma message ("Using Win32 locale in regex traits class") +#endif +#if defined(BOOST_REGEX_DYN_LINK) || defined(BOOST_ALL_DYN_LINK) +# pragma message ("Dynamic linking enabled") +#endif +#if defined(BOOST_REGEX_NO_LIB) || defined(BOOST_ALL_NO_LIB) +# pragma message ("Auto-linking disabled") +#endif +#ifdef BOOST_REGEX_NO_EXTERNAL_TEMPLATES +# pragma message ("Extern templates disabled") +#endif + +#endif + +#endif + + + + diff --git a/cutl/details/boost/regex/config/cwchar.hpp b/cutl/details/boost/regex/config/cwchar.hpp new file mode 100644 index 0000000..73cdcce --- /dev/null +++ b/cutl/details/boost/regex/config/cwchar.hpp @@ -0,0 +1,207 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE boost/regex/config/cwchar.hpp + * VERSION see + * DESCRIPTION: regex wide character string fixes. + */ + +#ifndef BOOST_REGEX_CONFIG_CWCHAR_HPP +#define BOOST_REGEX_CONFIG_CWCHAR_HPP + +#include +#include +#include + +#if defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER) +// apparently this is required for the RW STL on Linux: +#undef iswalnum +#undef iswalpha +#undef iswblank +#undef iswcntrl +#undef iswdigit +#undef iswgraph +#undef iswlower +#undef iswprint +#undef iswprint +#undef iswpunct +#undef iswspace +#undef iswupper +#undef iswxdigit +#undef iswctype +#undef towlower +#undef towupper +#undef towctrans +#undef wctrans +#undef wctype +#endif + +namespace std{ + +#ifndef BOOST_NO_STDC_NAMESPACE +extern "C"{ +#endif + +#ifdef iswalnum +inline int (iswalnum)(wint_t i) +{ return iswalnum(i); } +#undef iswalnum +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswalnum; +#endif + +#ifdef iswalpha +inline int (iswalpha)(wint_t i) +{ return iswalpha(i); } +#undef iswalpha +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswalpha; +#endif + +#ifdef iswcntrl +inline int (iswcntrl)(wint_t i) +{ return iswcntrl(i); } +#undef iswcntrl +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswcntrl; +#endif + +#ifdef iswdigit +inline int (iswdigit)(wint_t i) +{ return iswdigit(i); } +#undef iswdigit +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswdigit; +#endif + +#ifdef iswgraph +inline int (iswgraph)(wint_t i) +{ return iswgraph(i); } +#undef iswgraph +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswgraph; +#endif + +#ifdef iswlower +inline int (iswlower)(wint_t i) +{ return iswlower(i); } +#undef iswlower +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswlower; +#endif + +#ifdef iswprint +inline int (iswprint)(wint_t i) +{ return iswprint(i); } +#undef iswprint +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswprint; +#endif + +#ifdef iswpunct +inline int (iswpunct)(wint_t i) +{ return iswpunct(i); } +#undef iswpunct +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswpunct; +#endif + +#ifdef iswspace +inline int (iswspace)(wint_t i) +{ return iswspace(i); } +#undef iswspace +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswspace; +#endif + +#ifdef iswupper +inline int (iswupper)(wint_t i) +{ return iswupper(i); } +#undef iswupper +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswupper; +#endif + +#ifdef iswxdigit +inline int (iswxdigit)(wint_t i) +{ return iswxdigit(i); } +#undef iswxdigit +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::iswxdigit; +#endif + +#ifdef towlower +inline wint_t (towlower)(wint_t i) +{ return towlower(i); } +#undef towlower +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::towlower; +#endif + +#ifdef towupper +inline wint_t (towupper)(wint_t i) +{ return towupper(i); } +#undef towupper +#elif defined(BOOST_NO_STDC_NAMESPACE) +using :: towupper; +#endif + +#ifdef wcscmp +inline int (wcscmp)(const wchar_t *p1, const wchar_t *p2) +{ return wcscmp(p1,p2); } +#undef wcscmp +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcscmp; +#endif + +#ifdef wcscoll +inline int (wcscoll)(const wchar_t *p1, const wchar_t *p2) +{ return wcscoll(p1,p2); } +#undef wcscoll +#elif defined(BOOST_NO_STDC_NAMESPACE) && !defined(UNDER_CE) +using ::wcscoll; +#endif + +#ifdef wcscpy +inline wchar_t *(wcscpy)(wchar_t *p1, const wchar_t *p2) +{ return wcscpy(p1,p2); } +#undef wcscpy +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcscpy; +#endif + +#ifdef wcslen +inline size_t (wcslen)(const wchar_t *p) +{ return wcslen(p); } +#undef wcslen +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcslen; +#endif + +#ifdef wcsxfrm +size_t wcsxfrm(wchar_t *p1, const wchar_t *p2, size_t s) +{ return wcsxfrm(p1,p2,s); } +#undef wcsxfrm +#elif defined(BOOST_NO_STDC_NAMESPACE) +using ::wcsxfrm; +#endif + + +#ifndef BOOST_NO_STDC_NAMESPACE +} // extern "C" +#endif + +} // namespace std + +#endif + diff --git a/cutl/details/boost/regex/icu.hpp b/cutl/details/boost/regex/icu.hpp new file mode 100644 index 0000000..02b6085 --- /dev/null +++ b/cutl/details/boost/regex/icu.hpp @@ -0,0 +1,1029 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.hpp + * VERSION see + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ + +#ifndef BOOST_REGEX_ICU_HPP +#define BOOST_REGEX_ICU_HPP + +#include +#include +#include +#include +#include +#include +#include + +#ifdef BOOST_MSVC +#pragma warning (push) +#pragma warning (disable: 4251) +#endif + +namespace cutl_details_boost{ + +namespace re_detail{ + +// +// Implementation details: +// +class BOOST_REGEX_DECL icu_regex_traits_implementation +{ + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; + typedef cutl_details_boost::uint_least32_t char_class_type; +public: + icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& l) + : m_locale(l) + { + UErrorCode success = U_ZERO_ERROR; + m_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::IDENTICAL); + success = U_ZERO_ERROR; + m_primary_collator.reset(U_NAMESPACE_QUALIFIER Collator::createInstance(l, success)); + if(U_SUCCESS(success) == 0) + init_error(); + m_primary_collator->setStrength(U_NAMESPACE_QUALIFIER Collator::PRIMARY); + } + U_NAMESPACE_QUALIFIER Locale getloc()const + { + return m_locale; + } + string_type do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const; + string_type transform(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_collator.get()); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return do_transform(p1, p2, m_primary_collator.get()); + } +private: + void init_error() + { + std::runtime_error e("Could not initialize ICU resources"); + cutl_details_boost::throw_exception(e); + } + U_NAMESPACE_QUALIFIER Locale m_locale; // The ICU locale that we're using + cutl_details_boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_collator; // The full collation object + cutl_details_boost::scoped_ptr< U_NAMESPACE_QUALIFIER Collator> m_primary_collator; // The primary collation object +}; + +inline cutl_details_boost::shared_ptr get_icu_regex_traits_implementation(const U_NAMESPACE_QUALIFIER Locale& loc) +{ + return cutl_details_boost::shared_ptr(new icu_regex_traits_implementation(loc)); +} + +} + +class BOOST_REGEX_DECL icu_regex_traits +{ +public: + typedef UChar32 char_type; + typedef std::size_t size_type; + typedef std::vector string_type; + typedef U_NAMESPACE_QUALIFIER Locale locale_type; +#ifdef BOOST_NO_INT64_T + typedef std::bitset<64> char_class_type; +#else + typedef cutl_details_boost::uint64_t char_class_type; +#endif + + struct boost_extensions_tag{}; + + icu_regex_traits() + : m_pimpl(re_detail::get_icu_regex_traits_implementation(U_NAMESPACE_QUALIFIER Locale())) + { + } + static size_type length(const char_type* p); + + ::cutl_details_boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + ::cutl_details_boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c) const + { + return ((c < 0x7f) && (c > 0)) ? re_detail::get_default_escape_syntax_type(static_cast(c)) : regex_constants::syntax_char; + } + char_type translate(char_type c) const + { + return c; + } + char_type translate_nocase(char_type c) const + { + return ::u_tolower(c); + } + char_type translate(char_type c, bool icase) const + { + return icase ? translate_nocase(c) : translate(c); + } + char_type tolower(char_type c) const + { + return ::u_tolower(c); + } + char_type toupper(char_type c) const + { + return ::u_toupper(c); + } + string_type transform(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const char_type* p1, const char_type* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const char_type* p1, const char_type* p2) const; + string_type lookup_collatename(const char_type* p1, const char_type* p2) const; + bool isctype(char_type c, char_class_type f) const; + int toi(const char_type*& p1, const char_type* p2, int radix)const + { + return re_detail::global_toi(p1, p2, radix, *this); + } + int value(char_type c, int radix)const + { + return u_digit(c, static_cast< ::int8_t>(radix)); + } + locale_type imbue(locale_type l) + { + locale_type result(m_pimpl->getloc()); + m_pimpl = re_detail::get_icu_regex_traits_implementation(l); + return result; + } + locale_type getloc()const + { + return locale_type(); + } + std::string error_string(::cutl_details_boost::regex_constants::error_type n) const + { + return re_detail::get_default_error_string(n); + } +private: + icu_regex_traits(const icu_regex_traits&); + icu_regex_traits& operator=(const icu_regex_traits&); + + // + // define the bitmasks offsets we need for additional character properties: + // + enum{ + offset_blank = U_CHAR_CATEGORY_COUNT, + offset_space = U_CHAR_CATEGORY_COUNT+1, + offset_xdigit = U_CHAR_CATEGORY_COUNT+2, + offset_underscore = U_CHAR_CATEGORY_COUNT+3, + offset_unicode = U_CHAR_CATEGORY_COUNT+4, + offset_any = U_CHAR_CATEGORY_COUNT+5, + offset_ascii = U_CHAR_CATEGORY_COUNT+6, + offset_horizontal = U_CHAR_CATEGORY_COUNT+7, + offset_vertical = U_CHAR_CATEGORY_COUNT+8 + }; + + // + // and now the masks: + // + static const char_class_type mask_blank; + static const char_class_type mask_space; + static const char_class_type mask_xdigit; + static const char_class_type mask_underscore; + static const char_class_type mask_unicode; + static const char_class_type mask_any; + static const char_class_type mask_ascii; + static const char_class_type mask_horizontal; + static const char_class_type mask_vertical; + + static char_class_type lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2); + + cutl_details_boost::shared_ptr< ::cutl_details_boost::re_detail::icu_regex_traits_implementation> m_pimpl; +}; + +} // namespace cutl_details_boost + +// +// template instances: +// +#define BOOST_REGEX_CHAR_T UChar32 +#undef BOOST_REGEX_TRAITS_T +#define BOOST_REGEX_TRAITS_T , icu_regex_traits +#define BOOST_REGEX_ICU_INSTANCES +#ifdef BOOST_REGEX_ICU_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include +#undef BOOST_REGEX_CHAR_T +#undef BOOST_REGEX_TRAITS_T +#undef BOOST_REGEX_ICU_INSTANCES +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif + +namespace cutl_details_boost{ + +// types: +typedef basic_regex< ::UChar32, icu_regex_traits> u32regex; +typedef match_results u32match; +typedef match_results u16match; + +// +// Construction of 32-bit regex types from UTF-8 and UTF-16 primitives: +// +namespace re_detail{ + +#if !defined(BOOST_NO_MEMBER_TEMPLATES) && !defined(__IBMCPP__) +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt, + const cutl_details_boost::mpl::int_<1>*) +{ + typedef cutl_details_boost::u8_to_u32_iterator conv_type; + return u32regex(conv_type(i), conv_type(j), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt, + const cutl_details_boost::mpl::int_<2>*) +{ + typedef cutl_details_boost::u16_to_u32_iterator conv_type; + return u32regex(conv_type(i), conv_type(j), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt, + const cutl_details_boost::mpl::int_<4>*) +{ + return u32regex(i, j, opt); +} +#else +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt, + const cutl_details_boost::mpl::int_<1>*) +{ + typedef cutl_details_boost::u8_to_u32_iterator conv_type; + typedef std::vector vector_type; + vector_type v; + conv_type a(i), b(j); + while(a != b) + { + v.push_back(*a); + ++a; + } + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt, + const cutl_details_boost::mpl::int_<2>*) +{ + typedef cutl_details_boost::u16_to_u32_iterator conv_type; + typedef std::vector vector_type; + vector_type v; + conv_type a(i), b(j); + while(a != b) + { + v.push_back(*a); + ++a; + } + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); +} + +template +inline u32regex do_make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt, + const cutl_details_boost::mpl::int_<4>*) +{ + typedef std::vector vector_type; + vector_type v; + while(i != j) + { + v.push_back((UChar32)(*i)); + ++i; + } + if(v.size()) + return u32regex(&*v.begin(), v.size(), opt); + return u32regex(static_cast(0), static_cast(0), opt); +} +#endif +} + +// +// Construction from an iterator pair: +// +template +inline u32regex make_u32regex(InputIterator i, + InputIterator j, + cutl_details_boost::regex_constants::syntax_option_type opt) +{ + return re_detail::do_make_u32regex(i, j, opt, static_cast const*>(0)); +} +// +// construction from UTF-8 nul-terminated strings: +// +inline u32regex make_u32regex(const char* p, cutl_details_boost::regex_constants::syntax_option_type opt = cutl_details_boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + std::strlen(p), opt, static_cast const*>(0)); +} +inline u32regex make_u32regex(const unsigned char* p, cutl_details_boost::regex_constants::syntax_option_type opt = cutl_details_boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + std::strlen(reinterpret_cast(p)), opt, static_cast const*>(0)); +} +// +// construction from UTF-16 nul-terminated strings: +// +#ifndef BOOST_NO_WREGEX +inline u32regex make_u32regex(const wchar_t* p, cutl_details_boost::regex_constants::syntax_option_type opt = cutl_details_boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + std::wcslen(p), opt, static_cast const*>(0)); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex make_u32regex(const UChar* p, cutl_details_boost::regex_constants::syntax_option_type opt = cutl_details_boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(p, p + u_strlen(p), opt, static_cast const*>(0)); +} +#endif +// +// construction from basic_string class-template: +// +template +inline u32regex make_u32regex(const std::basic_string& s, cutl_details_boost::regex_constants::syntax_option_type opt = cutl_details_boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(s.begin(), s.end(), opt, static_cast const*>(0)); +} +// +// Construction from ICU string type: +// +inline u32regex make_u32regex(const U_NAMESPACE_QUALIFIER UnicodeString& s, cutl_details_boost::regex_constants::syntax_option_type opt = cutl_details_boost::regex_constants::perl) +{ + return re_detail::do_make_u32regex(s.getBuffer(), s.getBuffer() + s.length(), opt, static_cast const*>(0)); +} + +// +// regex_match overloads that widen the character type as appropriate: +// +namespace re_detail{ +template +void copy_results(MR1& out, MR2 const& in) +{ + // copy results from an adapted MR2 match_results: + out.set_size(in.size(), in.prefix().first.base(), in.suffix().second.base()); + out.set_base(in.base().base()); + for(int i = 0; i < (int)in.size(); ++i) + { + if(in[i].matched) + { + out.set_first(in[i].first.base(), i); + out.set_second(in[i].second.base(), i); + } + } +} + +template +inline bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + cutl_details_boost::mpl::int_<4> const*) +{ + return ::cutl_details_boost::regex_match(first, last, m, e, flags); +} +template +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + cutl_details_boost::mpl::int_<2> const*) +{ + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::cutl_details_boost::regex_match(conv_type(first), conv_type(last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +template +bool do_regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + cutl_details_boost::mpl::int_<1> const*) +{ + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::cutl_details_boost::regex_match(conv_type(first), conv_type(last), what, e, flags); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +} // namespace re_detail + +template +inline bool u32regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); +} +// +// regex_match overloads that do not return what matched: +// +template +inline bool u32regex_match(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(first, last, m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(p, p+u_strlen(p), m, e, flags, static_cast const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_match(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(p, p+std::wcslen(p), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(p, p+std::strlen(p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(p, p+std::strlen((const char*)p), m, e, flags, static_cast const*>(0)); +} +inline bool u32regex_match(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_match(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(s.begin(), s.end(), m, e, flags, static_cast const*>(0)); +} +#endif +inline bool u32regex_match(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_match(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, static_cast const*>(0)); +} + +// +// regex_search overloads that widen the character type as appropriate: +// +namespace re_detail{ +template +inline bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + cutl_details_boost::mpl::int_<4> const*) +{ + return ::cutl_details_boost::regex_search(first, last, m, e, flags, base); +} +template +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + cutl_details_boost::mpl::int_<2> const*) +{ + typedef u16_to_u32_iterator conv_type; + typedef match_results match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::cutl_details_boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +template +bool do_regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base, + cutl_details_boost::mpl::int_<1> const*) +{ + typedef u8_to_u32_iterator conv_type; + typedef match_results match_type; + typedef typename match_type::allocator_type alloc_type; + match_type what; + bool result = ::cutl_details_boost::regex_search(conv_type(first), conv_type(last), what, e, flags, conv_type(base)); + // copy results across to m: + if(result) copy_results(m, what); + return result; +} +} + +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const u32regex& e, + match_flag_type flags, + BidiIterator base) +{ + return re_detail::do_regex_search(first, last, m, e, flags, base, static_cast const*>(0)); +} +inline bool u32regex_search(const UChar* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const std::string& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + match_results& m, + const u32regex& e, + match_flag_type flags = match_default) +{ + return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); +} +template +inline bool u32regex_search(BidiIterator first, BidiIterator last, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(first, last, m, e, flags, first, static_cast const*>(0)); +} +inline bool u32regex_search(const UChar* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(p, p+u_strlen(p), m, e, flags, p, static_cast const*>(0)); +} +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) && !defined(BOOST_NO_WREGEX) +inline bool u32regex_search(const wchar_t* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(p, p+std::wcslen(p), m, e, flags, p, static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(p, p+std::strlen(p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const unsigned char* p, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(p, p+std::strlen((const char*)p), m, e, flags, p, static_cast const*>(0)); +} +inline bool u32regex_search(const std::string& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#ifndef BOOST_NO_STD_WSTRING +inline bool u32regex_search(const std::wstring& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(s.begin(), s.end(), m, e, flags, s.begin(), static_cast const*>(0)); +} +#endif +inline bool u32regex_search(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return re_detail::do_regex_search(s.getBuffer(), s.getBuffer() + s.length(), m, e, flags, s.getBuffer(), static_cast const*>(0)); +} + +// +// overloads for regex_replace with utf-8 and utf-16 data types: +// +namespace re_detail{ +template +inline std::pair< cutl_details_boost::u8_to_u32_iterator, cutl_details_boost::u8_to_u32_iterator > + make_utf32_seq(I i, I j, mpl::int_<1> const*) +{ + return std::pair< cutl_details_boost::u8_to_u32_iterator, cutl_details_boost::u8_to_u32_iterator >(cutl_details_boost::u8_to_u32_iterator(i), cutl_details_boost::u8_to_u32_iterator(j)); +} +template +inline std::pair< cutl_details_boost::u16_to_u32_iterator, cutl_details_boost::u16_to_u32_iterator > + make_utf32_seq(I i, I j, mpl::int_<2> const*) +{ + return std::pair< cutl_details_boost::u16_to_u32_iterator, cutl_details_boost::u16_to_u32_iterator >(cutl_details_boost::u16_to_u32_iterator(i), cutl_details_boost::u16_to_u32_iterator(j)); +} +template +inline std::pair< I, I > + make_utf32_seq(I i, I j, mpl::int_<4> const*) +{ + return std::pair< I, I >(i, j); +} +template +inline std::pair< cutl_details_boost::u8_to_u32_iterator, cutl_details_boost::u8_to_u32_iterator > + make_utf32_seq(const charT* p, mpl::int_<1> const*) +{ + return std::pair< cutl_details_boost::u8_to_u32_iterator, cutl_details_boost::u8_to_u32_iterator >(cutl_details_boost::u8_to_u32_iterator(p), cutl_details_boost::u8_to_u32_iterator(p+std::strlen((const char*)p))); +} +template +inline std::pair< cutl_details_boost::u16_to_u32_iterator, cutl_details_boost::u16_to_u32_iterator > + make_utf32_seq(const charT* p, mpl::int_<2> const*) +{ + return std::pair< cutl_details_boost::u16_to_u32_iterator, cutl_details_boost::u16_to_u32_iterator >(cutl_details_boost::u16_to_u32_iterator(p), cutl_details_boost::u16_to_u32_iterator(p+u_strlen((const UChar*)p))); +} +template +inline std::pair< const charT*, const charT* > + make_utf32_seq(const charT* p, mpl::int_<4> const*) +{ + return std::pair< const charT*, const charT* >(p, p+icu_regex_traits::length((UChar32 const*)p)); +} +template +inline OutputIterator make_utf32_out(OutputIterator o, mpl::int_<4> const*) +{ + return o; +} +template +inline utf16_output_iterator make_utf32_out(OutputIterator o, mpl::int_<2> const*) +{ + return o; +} +template +inline utf8_output_iterator make_utf32_out(OutputIterator o, mpl::int_<1> const*) +{ + return o; +} + +template +OutputIterator do_regex_replace(OutputIterator out, + std::pair const& in, + const u32regex& e, + const std::pair& fmt, + match_flag_type flags + ) +{ + // unfortunately we have to copy the format string in order to pass in onward: + std::vector f; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + f.assign(fmt.first, fmt.second); +#else + f.clear(); + I2 pos = fmt.first; + while(pos != fmt.second) + f.push_back(*pos++); +#endif + + regex_iterator i(in.first, in.second, e, flags); + regex_iterator j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(in.first, in.second, out); + } + else + { + I1 last_m = in.first; + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(i->prefix().first, i->prefix().second, out); + if(f.size()) + out = ::cutl_details_boost::re_detail::regex_format_imp(out, *i, &*f.begin(), &*f.begin() + f.size(), flags, e.get_traits()); + else + out = ::cutl_details_boost::re_detail::regex_format_imp(out, *i, static_cast(0), static_cast(0), flags, e.get_traits()); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(last_m, in.second, out); + } + return out; +} +template +inline const BaseIterator& extract_output_base(const BaseIterator& b) +{ + return b; +} +template +inline BaseIterator extract_output_base(const utf8_output_iterator& b) +{ + return b.base(); +} +template +inline BaseIterator extract_output_base(const utf16_output_iterator& b) +{ + return b.base(); +} +} // re_detail + +template +inline OutputIterator u32regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return re_detail::extract_output_base +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1300) + +#endif + ( + re_detail::do_regex_replace( + re_detail::make_utf32_out(out, static_cast const*>(0)), + re_detail::make_utf32_seq(first, last, static_cast const*>(0)), + e, + re_detail::make_utf32_seq(fmt, static_cast const*>(0)), + flags) + ); +} + +template +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return re_detail::extract_output_base +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1300) + +#endif + ( + re_detail::do_regex_replace( + re_detail::make_utf32_out(out, static_cast const*>(0)), + re_detail::make_utf32_seq(first, last, static_cast const*>(0)), + e, + re_detail::make_utf32_seq(fmt.begin(), fmt.end(), static_cast const*>(0)), + flags) + ); +} + +template +inline OutputIterator u32regex_replace(OutputIterator out, + Iterator first, + Iterator last, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) +{ + return re_detail::extract_output_base +#if BOOST_WORKAROUND(BOOST_MSVC, <= 1300) + +#endif + ( + re_detail::do_regex_replace( + re_detail::make_utf32_out(out, static_cast const*>(0)), + re_detail::make_utf32_seq(first, last, static_cast const*>(0)), + e, + re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags) + ); +} + +template +std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + re_detail::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +template +std::basic_string u32regex_replace(const std::basic_string& s, + const u32regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + re_detail::string_out_iterator > i(result); + u32regex_replace(i, s.begin(), s.end(), e, fmt.c_str(), flags); + return result; +} + +namespace re_detail{ + +class unicode_string_out_iterator +{ + U_NAMESPACE_QUALIFIER UnicodeString* out; +public: + unicode_string_out_iterator(U_NAMESPACE_QUALIFIER UnicodeString& s) : out(&s) {} + unicode_string_out_iterator& operator++() { return *this; } + unicode_string_out_iterator& operator++(int) { return *this; } + unicode_string_out_iterator& operator*() { return *this; } + unicode_string_out_iterator& operator=(UChar v) + { + *out += v; + return *this; + } + typedef std::ptrdiff_t difference_type; + typedef UChar value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +}; + +} + +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const UChar* fmt, + match_flag_type flags = match_default) +{ + U_NAMESPACE_QUALIFIER UnicodeString result; + re_detail::unicode_string_out_iterator i(result); + u32regex_replace(i, s.getBuffer(), s.getBuffer()+s.length(), e, fmt, flags); + return result; +} + +inline U_NAMESPACE_QUALIFIER UnicodeString u32regex_replace(const U_NAMESPACE_QUALIFIER UnicodeString& s, + const u32regex& e, + const U_NAMESPACE_QUALIFIER UnicodeString& fmt, + match_flag_type flags = match_default) +{ + U_NAMESPACE_QUALIFIER UnicodeString result; + re_detail::unicode_string_out_iterator i(result); + re_detail::do_regex_replace( + re_detail::make_utf32_out(i, static_cast const*>(0)), + re_detail::make_utf32_seq(s.getBuffer(), s.getBuffer()+s.length(), static_cast const*>(0)), + e, + re_detail::make_utf32_seq(fmt.getBuffer(), fmt.getBuffer() + fmt.length(), static_cast const*>(0)), + flags); + return result; +} + +} // namespace cutl_details_boost. + +#ifdef BOOST_MSVC +#pragma warning (pop) +#endif + +#include +#include + +#endif diff --git a/cutl/details/boost/regex/pattern_except.hpp b/cutl/details/boost/regex/pattern_except.hpp new file mode 100644 index 0000000..b81c8e1 --- /dev/null +++ b/cutl/details/boost/regex/pattern_except.hpp @@ -0,0 +1,100 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE pattern_except.hpp + * VERSION see + * DESCRIPTION: Declares pattern-matching exception classes. + */ + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#define BOOST_RE_PAT_EXCEPT_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include +#include + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4275) +#endif +class BOOST_REGEX_DECL regex_error : public std::runtime_error +{ +public: + explicit regex_error(const std::string& s, regex_constants::error_type err = regex_constants::error_unknown, std::ptrdiff_t pos = 0); + explicit regex_error(regex_constants::error_type err); + ~regex_error() throw(); + regex_constants::error_type code()const + { return m_error_code; } + std::ptrdiff_t position()const + { return m_position; } + void raise()const; +private: + regex_constants::error_type m_error_code; + std::ptrdiff_t m_position; +}; + +typedef regex_error bad_pattern; +typedef regex_error bad_expression; + +namespace re_detail{ + +BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex); + +template +void raise_error(const traits& t, regex_constants::error_type code) +{ + (void)t; // warning suppression + std::runtime_error e(t.error_string(code)); + ::cutl_details_boost::re_detail::raise_runtime_error(e); +} + +} + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif + + + diff --git a/cutl/details/boost/regex/pending/object_cache.hpp b/cutl/details/boost/regex/pending/object_cache.hpp new file mode 100644 index 0000000..7a213a9 --- /dev/null +++ b/cutl/details/boost/regex/pending/object_cache.hpp @@ -0,0 +1,165 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE object_cache.hpp + * VERSION see + * DESCRIPTION: Implements a generic object cache. + */ + +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#define BOOST_REGEX_OBJECT_CACHE_HPP + +#include +#include +#include +#include +#include +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +namespace cutl_details_boost{ + +template +class object_cache +{ +public: + typedef std::pair< ::cutl_details_boost::shared_ptr, Key const*> value_type; + typedef std::list list_type; + typedef typename list_type::iterator list_iterator; + typedef std::map map_type; + typedef typename map_type::iterator map_iterator; + typedef typename list_type::size_type size_type; + static cutl_details_boost::shared_ptr get(const Key& k, size_type l_max_cache_size); + +private: + static cutl_details_boost::shared_ptr do_get(const Key& k, size_type l_max_cache_size); + + struct data + { + list_type cont; + map_type index; + }; + + // Needed by compilers not implementing the resolution to DR45. For reference, + // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. + friend struct data; +}; + +template +cutl_details_boost::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) +{ +#ifdef BOOST_HAS_THREADS + static cutl_details_boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT; + + cutl_details_boost::static_mutex::scoped_lock l(mut); + if(l) + { + return do_get(k, l_max_cache_size); + } + // + // what do we do if the lock fails? + // for now just throw, but we should never really get here... + // + ::cutl_details_boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); +#ifdef BOOST_NO_UNREACHABLE_RETURN_DETECTION + return cutl_details_boost::shared_ptr(); +#endif +#else + return do_get(k, l_max_cache_size); +#endif +} + +template +cutl_details_boost::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) +{ + typedef typename object_cache::data object_data; + typedef typename map_type::size_type map_size_type; + static object_data s_data; + + // + // see if the object is already in the cache: + // + map_iterator mpos = s_data.index.find(k); + if(mpos != s_data.index.end()) + { + // + // Eureka! + // We have a cached item, bump it up the list and return it: + // + if(--(s_data.cont.end()) != mpos->second) + { + // splice out the item we want to move: + list_type temp; + temp.splice(temp.end(), s_data.cont, mpos->second); + // and now place it at the end of the list: + s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); + BOOST_ASSERT(*(s_data.cont.back().second) == k); + // update index with new position: + mpos->second = --(s_data.cont.end()); + BOOST_ASSERT(&(mpos->first) == mpos->second->second); + BOOST_ASSERT(&(mpos->first) == s_data.cont.back().second); + } + return s_data.cont.back().first; + } + // + // if we get here then the item is not in the cache, + // so create it: + // + cutl_details_boost::shared_ptr result(new Object(k)); + // + // Add it to the list, and index it: + // + s_data.cont.push_back(value_type(result, static_cast(0))); + s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); + s_data.cont.back().second = &(s_data.index.find(k)->first); + map_size_type s = s_data.index.size(); + BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_ASSERT(s_data.index.find(k)->first == k); + if(s > l_max_cache_size) + { + // + // We have too many items in the list, so we need to start + // popping them off the back of the list, but only if they're + // being held uniquely by us: + // + list_iterator pos = s_data.cont.begin(); + list_iterator last = s_data.cont.end(); + while((pos != last) && (s > l_max_cache_size)) + { + if(pos->first.unique()) + { + list_iterator condemmed(pos); + ++pos; + // now remove the items from our containers, + // then order has to be as follows: + BOOST_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); + s_data.index.erase(*(condemmed->second)); + s_data.cont.erase(condemmed); + --s; + } + else + --pos; + } + BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); + BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); + BOOST_ASSERT(s_data.index.find(k)->first == k); + } + return result; +} + +} + +#endif diff --git a/cutl/details/boost/regex/pending/static_mutex.hpp b/cutl/details/boost/regex/pending/static_mutex.hpp new file mode 100644 index 0000000..8413a56 --- /dev/null +++ b/cutl/details/boost/regex/pending/static_mutex.hpp @@ -0,0 +1,180 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE static_mutex.hpp + * VERSION see + * DESCRIPTION: Declares static_mutex lock type, there are three different + * implementations: POSIX pthreads, WIN32 threads, and portable, + * these are described in more detail below. + */ + +#ifndef BOOST_REGEX_STATIC_MUTEX_HPP +#define BOOST_REGEX_STATIC_MUTEX_HPP + +#include +#include // dll import/export options. + +#ifdef BOOST_HAS_PTHREADS +#include +#endif + +#if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER) +// +// pthreads version: +// simple wrap around a pthread_mutex_t initialized with +// PTHREAD_MUTEX_INITIALIZER. +// +namespace cutl_details_boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; + +class static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + pthread_mutex_t m_mutex; +}; + +#define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + inline bool locked()const + { + return m_have_lock; + } + inline operator void const*()const + { + return locked() ? this : 0; + } + void lock(); + void unlock(); +private: + static_mutex& m_mutex; + bool m_have_lock; +}; + + +} // namespace cutl_details_boost +#elif defined(BOOST_HAS_WINTHREADS) +// +// Win32 version: +// Use a 32-bit int as a lock, along with a test-and-set +// implementation using InterlockedCompareExchange. +// + +#include + +namespace cutl_details_boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; + +class static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + cutl_details_boost::int32_t m_mutex; +}; + +#define BOOST_STATIC_MUTEX_INIT { 0, } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + operator void const*()const + { + return locked() ? this : 0; + } + bool locked()const + { + return m_have_lock; + } + void lock(); + void unlock(); +private: + static_mutex& m_mutex; + bool m_have_lock; + scoped_static_mutex_lock(const scoped_static_mutex_lock&); + scoped_static_mutex_lock& operator=(const scoped_static_mutex_lock&); +}; + +} // namespace + +#else +// +// Portable version of a static mutex based on Boost.Thread library: +// This has to use a single mutex shared by all instances of static_mutex +// because cutl_details_boost::call_once doesn't alow us to pass instance information +// down to the initialisation proceedure. In fact the initialisation routine +// may need to be called more than once - but only once per instance. +// +// Since this preprocessor path is almost never taken, we hide these header +// dependencies so that build tools don't find them. +// +#define B1 +#define B2 +#include B1 +#include B2 +#undef B1 +#undef B2 + +namespace cutl_details_boost{ + +class BOOST_REGEX_DECL scoped_static_mutex_lock; +extern "C" BOOST_REGEX_DECL void cutl_details_boost_regex_free_static_mutex(); + +class BOOST_REGEX_DECL static_mutex +{ +public: + typedef scoped_static_mutex_lock scoped_lock; + static void init(); + static cutl_details_boost::recursive_mutex* m_pmutex; + static cutl_details_boost::once_flag m_once; +}; + +#define BOOST_STATIC_MUTEX_INIT { } + +class BOOST_REGEX_DECL scoped_static_mutex_lock +{ +public: + scoped_static_mutex_lock(static_mutex& mut, bool lk = true); + ~scoped_static_mutex_lock(); + operator void const*()const; + bool locked()const; + void lock(); + void unlock(); +private: + cutl_details_boost::recursive_mutex::scoped_lock* m_plock; + bool m_have_lock; +}; + +inline scoped_static_mutex_lock::operator void const*()const +{ + return locked() ? this : 0; +} + +inline bool scoped_static_mutex_lock::locked()const +{ + return m_have_lock; +} + +} // namespace + +#endif + +#endif diff --git a/cutl/details/boost/regex/pending/unicode_iterator.hpp b/cutl/details/boost/regex/pending/unicode_iterator.hpp new file mode 100644 index 0000000..2ab88c3 --- /dev/null +++ b/cutl/details/boost/regex/pending/unicode_iterator.hpp @@ -0,0 +1,692 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE unicode_iterator.hpp + * VERSION see + * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. + */ + +/**************************************************************************** + +Contents: +~~~~~~~~~ + +1) Read Only, Input Adapters: +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +class u32_to_u8_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. + +template +class u8_to_u32_iterator; + +Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. + +template +class u32_to_u16_iterator; + +Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. + +template +class u16_to_u32_iterator; + +Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. + +2) Single pass output iterator adapters: + +template +class utf8_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-8 code points. + +template +class utf16_output_iterator; + +Accepts UTF-32 code points and forwards them on as UTF-16 code points. + +****************************************************************************/ + +#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP +#define BOOST_REGEX_UNICODE_ITERATOR_HPP +#include +#include +#include +#include +#include +#include +#ifndef BOOST_NO_STD_LOCALE +#include +#include +#endif +#include // CHAR_BIT + +namespace cutl_details_boost{ + +namespace detail{ + +static const ::cutl_details_boost::uint16_t high_surrogate_base = 0xD7C0u; +static const ::cutl_details_boost::uint16_t low_surrogate_base = 0xDC00u; +static const ::cutl_details_boost::uint32_t ten_bit_mask = 0x3FFu; + +inline bool is_high_surrogate(::cutl_details_boost::uint16_t v) +{ + return (v & 0xFC00u) == 0xd800u; +} +inline bool is_low_surrogate(::cutl_details_boost::uint16_t v) +{ + return (v & 0xFC00u) == 0xdc00u; +} +template +inline bool is_surrogate(T v) +{ + return (v & 0xF800u) == 0xd800; +} + +inline unsigned utf8_byte_count(cutl_details_boost::uint8_t c) +{ + // if the most significant bit with a zero in it is in position + // 8-N then there are N bytes in this UTF-8 sequence: + cutl_details_boost::uint8_t mask = 0x80u; + unsigned result = 0; + while(c & mask) + { + ++result; + mask >>= 1; + } + return (result == 0) ? 1 : ((result > 4) ? 4 : result); +} + +inline unsigned utf8_trailing_byte_count(cutl_details_boost::uint8_t c) +{ + return utf8_byte_count(c) - 1; +} + +inline void invalid_utf32_code_point(::cutl_details_boost::uint32_t val) +{ +#ifndef BOOST_NO_STD_LOCALE + std::stringstream ss; + ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; + std::out_of_range e(ss.str()); +#else + std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence"); +#endif + cutl_details_boost::throw_exception(e); +} + + +} // namespace detail + +template +class u32_to_u16_iterator + : public cutl_details_boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> +{ + typedef cutl_details_boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type; + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); + BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_current == 2) + extract_current(); + return m_values[m_current]; + } + bool equal(const u32_to_u16_iterator& that)const + { + if(m_position == that.m_position) + { + // Both m_currents must be equal, or both even + // this is the same as saying their sum must be even: + return (m_current + that.m_current) & 1u ? false : true; + } + return false; + } + void increment() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 2) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 2; + ++m_position; + } + } + void decrement() + { + if(m_current != 1) + { + // decrementing an iterator always leads to a valid position: + --m_position; + extract_current(); + m_current = m_values[1] ? 1 : 0; + } + else + { + m_current = 0; + } + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u16_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } + u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + } +private: + + void extract_current()const + { + // begin by checking for a code point out of range: + ::cutl_details_boost::uint32_t v = *m_position; + if(v >= 0x10000u) + { + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(*m_position); + // split into two surrogates: + m_values[0] = static_cast(v >> 10) + detail::high_surrogate_base; + m_values[1] = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + m_current = 0; + BOOST_ASSERT(detail::is_high_surrogate(m_values[0])); + BOOST_ASSERT(detail::is_low_surrogate(m_values[1])); + } + else + { + // 16-bit code point: + m_values[0] = static_cast(*m_position); + m_values[1] = 0; + m_current = 0; + // value must not be a surrogate: + if(detail::is_surrogate(m_values[0])) + detail::invalid_utf32_code_point(*m_position); + } + } + BaseIterator m_position; + mutable U16Type m_values[3]; + mutable unsigned m_current; +}; + +template +class u16_to_u32_iterator + : public cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> +{ + typedef cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; + // special values for pending iterator reads: + BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16); + BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool equal(const u16_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + void increment() + { + // skip high surrogate first if there is one: + if(detail::is_high_surrogate(*m_position)) ++m_position; + ++m_position; + m_value = pending_read; + } + void decrement() + { + --m_position; + // if we have a low surrogate then go back one more: + if(detail::is_low_surrogate(*m_position)) + --m_position; + m_value = pending_read; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u16_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u16_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } +private: + static void invalid_code_point(::cutl_details_boost::uint16_t val) + { +#ifndef BOOST_NO_STD_LOCALE + std::stringstream ss; + ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; + std::out_of_range e(ss.str()); +#else + std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence"); +#endif + cutl_details_boost::throw_exception(e); + } + void extract_current()const + { + m_value = static_cast(static_cast< ::cutl_details_boost::uint16_t>(*m_position)); + // if the last value is a high surrogate then adjust m_position and m_value as needed: + if(detail::is_high_surrogate(*m_position)) + { + // precondition; next value must have be a low-surrogate: + BaseIterator next(m_position); + ::cutl_details_boost::uint16_t t = *++next; + if((t & 0xFC00u) != 0xDC00u) + invalid_code_point(t); + m_value = (m_value - detail::high_surrogate_base) << 10; + m_value |= (static_cast(static_cast< ::cutl_details_boost::uint16_t>(t)) & detail::ten_bit_mask); + } + // postcondition; result must not be a surrogate: + if(detail::is_surrogate(m_value)) + invalid_code_point(static_cast< ::cutl_details_boost::uint16_t>(m_value)); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class u32_to_u8_iterator + : public cutl_details_boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> +{ + typedef cutl_details_boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type; + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); + BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_current == 4) + extract_current(); + return m_values[m_current]; + } + bool equal(const u32_to_u8_iterator& that)const + { + if(m_position == that.m_position) + { + // either the m_current's must be equal, or one must be 0 and + // the other 4: which means neither must have bits 1 or 2 set: + return (m_current == that.m_current) + || (((m_current | that.m_current) & 3) == 0); + } + return false; + } + void increment() + { + // if we have a pending read then read now, so that we know whether + // to skip a position, or move to a low-surrogate: + if(m_current == 4) + { + // pending read: + extract_current(); + } + // move to the next surrogate position: + ++m_current; + // if we've reached the end skip a position: + if(m_values[m_current] == 0) + { + m_current = 4; + ++m_position; + } + } + void decrement() + { + if((m_current & 3) == 0) + { + --m_position; + extract_current(); + m_current = 3; + while(m_current && (m_values[m_current] == 0)) + --m_current; + } + else + --m_current; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u32_to_u8_iterator() : m_position(), m_current(0) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } + u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) + { + m_values[0] = 0; + m_values[1] = 0; + m_values[2] = 0; + m_values[3] = 0; + m_values[4] = 0; + } +private: + + void extract_current()const + { + cutl_details_boost::uint32_t c = *m_position; + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + m_values[0] = static_cast(c); + m_values[1] = static_cast(0u); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x800u) + { + m_values[0] = static_cast(0xC0u + (c >> 6)); + m_values[1] = static_cast(0x80u + (c & 0x3Fu)); + m_values[2] = static_cast(0u); + m_values[3] = static_cast(0u); + } + else if(c < 0x10000u) + { + m_values[0] = static_cast(0xE0u + (c >> 12)); + m_values[1] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[2] = static_cast(0x80u + (c & 0x3Fu)); + m_values[3] = static_cast(0u); + } + else + { + m_values[0] = static_cast(0xF0u + (c >> 18)); + m_values[1] = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + m_values[2] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + m_values[3] = static_cast(0x80u + (c & 0x3Fu)); + } + m_current= 0; + } + BaseIterator m_position; + mutable U8Type m_values[5]; + mutable unsigned m_current; +}; + +template +class u8_to_u32_iterator + : public cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> +{ + typedef cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; + // special values for pending iterator reads: + BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); + +#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef typename std::iterator_traits::value_type base_value_type; + + BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8); + BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); +#endif + +public: + typename base_type::reference + dereference()const + { + if(m_value == pending_read) + extract_current(); + return m_value; + } + bool equal(const u8_to_u32_iterator& that)const + { + return m_position == that.m_position; + } + void increment() + { + // skip high surrogate first if there is one: + unsigned c = detail::utf8_byte_count(*m_position); + std::advance(m_position, c); + m_value = pending_read; + } + void decrement() + { + // Keep backtracking until we don't have a trailing character: + unsigned count = 0; + while((*--m_position & 0xC0u) == 0x80u) ++count; + // now check that the sequence was valid: + if(count != detail::utf8_trailing_byte_count(*m_position)) + invalid_sequnce(); + m_value = pending_read; + } + BaseIterator base()const + { + return m_position; + } + // construct: + u8_to_u32_iterator() : m_position() + { + m_value = pending_read; + } + u8_to_u32_iterator(BaseIterator b) : m_position(b) + { + m_value = pending_read; + } +private: + static void invalid_sequnce() + { + std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); + cutl_details_boost::throw_exception(e); + } + void extract_current()const + { + m_value = static_cast(static_cast< ::cutl_details_boost::uint8_t>(*m_position)); + // we must not have a continuation character: + if((m_value & 0xC0u) == 0x80u) + invalid_sequnce(); + // see how many extra byts we have: + unsigned extra = detail::utf8_trailing_byte_count(*m_position); + // extract the extra bits, 6 from each extra byte: + BaseIterator next(m_position); + for(unsigned c = 0; c < extra; ++c) + { + ++next; + m_value <<= 6; + m_value += static_cast(*next) & 0x3Fu; + } + // we now need to remove a few of the leftmost bits, but how many depends + // upon how many extra bytes we've extracted: + static const cutl_details_boost::uint32_t masks[4] = + { + 0x7Fu, + 0x7FFu, + 0xFFFFu, + 0x1FFFFFu, + }; + m_value &= masks[extra]; + // check the result: + if(m_value > static_cast(0x10FFFFu)) + invalid_sequnce(); + } + BaseIterator m_position; + mutable U32Type m_value; +}; + +template +class utf16_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef cutl_details_boost::uint32_t* pointer; + typedef cutl_details_boost::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf16_output_iterator(const BaseIterator& b) + : m_position(b){} + utf16_output_iterator(const utf16_output_iterator& that) + : m_position(that.m_position){} + utf16_output_iterator& operator=(const utf16_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf16_output_iterator& operator*()const + { + return *this; + } + void operator=(cutl_details_boost::uint32_t val)const + { + push(val); + } + utf16_output_iterator& operator++() + { + return *this; + } + utf16_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(cutl_details_boost::uint32_t v)const + { + if(v >= 0x10000u) + { + // begin by checking for a code point out of range: + if(v > 0x10FFFFu) + detail::invalid_utf32_code_point(v); + // split into two surrogates: + *m_position++ = static_cast(v >> 10) + detail::high_surrogate_base; + *m_position++ = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; + } + else + { + // 16-bit code point: + // value must not be a surrogate: + if(detail::is_surrogate(v)) + detail::invalid_utf32_code_point(v); + *m_position++ = static_cast(v); + } + } + mutable BaseIterator m_position; +}; + +template +class utf8_output_iterator +{ +public: + typedef void difference_type; + typedef void value_type; + typedef cutl_details_boost::uint32_t* pointer; + typedef cutl_details_boost::uint32_t& reference; + typedef std::output_iterator_tag iterator_category; + + utf8_output_iterator(const BaseIterator& b) + : m_position(b){} + utf8_output_iterator(const utf8_output_iterator& that) + : m_position(that.m_position){} + utf8_output_iterator& operator=(const utf8_output_iterator& that) + { + m_position = that.m_position; + return *this; + } + const utf8_output_iterator& operator*()const + { + return *this; + } + void operator=(cutl_details_boost::uint32_t val)const + { + push(val); + } + utf8_output_iterator& operator++() + { + return *this; + } + utf8_output_iterator& operator++(int) + { + return *this; + } + BaseIterator base()const + { + return m_position; + } +private: + void push(cutl_details_boost::uint32_t c)const + { + if(c > 0x10FFFFu) + detail::invalid_utf32_code_point(c); + if(c < 0x80u) + { + *m_position++ = static_cast(c); + } + else if(c < 0x800u) + { + *m_position++ = static_cast(0xC0u + (c >> 6)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else if(c < 0x10000u) + { + *m_position++ = static_cast(0xE0u + (c >> 12)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + else + { + *m_position++ = static_cast(0xF0u + (c >> 18)); + *m_position++ = static_cast(0x80u + ((c >> 12) & 0x3Fu)); + *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); + *m_position++ = static_cast(0x80u + (c & 0x3Fu)); + } + } + mutable BaseIterator m_position; +}; + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP + diff --git a/cutl/details/boost/regex/regex_traits.hpp b/cutl/details/boost/regex/regex_traits.hpp new file mode 100644 index 0000000..df0769b --- /dev/null +++ b/cutl/details/boost/regex/regex_traits.hpp @@ -0,0 +1,35 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP +#define BOOST_REGEX_TRAITS_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +# include +#endif + +# ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +# include +# endif + +#endif // include + + + + + diff --git a/cutl/details/boost/regex/src/c_regex_traits.cxx b/cutl/details/boost/regex/src/c_regex_traits.cxx new file mode 100644 index 0000000..b0f3a47 --- /dev/null +++ b/cutl/details/boost/regex/src/c_regex_traits.cxx @@ -0,0 +1,212 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: c_regex_traits.cpp + * VERSION: see + * DESCRIPTION: Implements out of line c_regex_traits members + */ + + +#define BOOST_REGEX_SOURCE + +#include +#include + +#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560) + +#include +#include +#include + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::strxfrm; using ::isspace; + using ::ispunct; using ::isalpha; + using ::isalnum; using ::iscntrl; + using ::isprint; using ::isupper; + using ::islower; using ::isdigit; + using ::isxdigit; using ::strtol; +} +#endif + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +namespace cutl_details_boost{ + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const char* p1, const char* p2) +{ + std::string result(10, ' '); + std::size_t s = result.size(); + std::size_t r; + std::string src(p1, p2); + while(s < (r = std::strxfrm(&*result.begin(), src.c_str(), s))) + { + result.append(r - s + 3, ' '); + s = result.size(); + } + result.erase(r); + return result; +} + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const char* p1, const char* p2) +{ + static char s_delim; + static const int s_collate_type = ::cutl_details_boost::re_detail::find_sort_syntax(static_cast*>(0), &s_delim); + std::string result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(s_collate_type) + { + case ::cutl_details_boost::re_detail::sort_C: + case ::cutl_details_boost::re_detail::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for(std::string::size_type i = 0; i < result.size(); ++i) + result[i] = static_cast((std::tolower)(static_cast(result[i]))); + result = transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::cutl_details_boost::re_detail::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = transform(p1, p2); + result.erase(s_delim); + break; + } + case ::cutl_details_boost::re_detail::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = transform(p1, p2); + if(result.size() && (result[0] == s_delim)) + break; + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if(result.empty()) + result = std::string(1, char(0)); + return result; +} + +enum +{ + char_class_space=1<<0, + char_class_print=1<<1, + char_class_cntrl=1<<2, + char_class_upper=1<<3, + char_class_lower=1<<4, + char_class_alpha=1<<5, + char_class_digit=1<<6, + char_class_punct=1<<7, + char_class_xdigit=1<<8, + char_class_alnum=char_class_alpha|char_class_digit, + char_class_graph=char_class_alnum|char_class_punct, + char_class_blank=1<<9, + char_class_word=1<<10, + char_class_unicode=1<<11, + char_class_horizontal=1<<12, + char_class_vertical=1<<13 +}; + +c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const char* p1, const char* p2) +{ + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::cutl_details_boost::re_detail::get_default_class_id(p1, p2); + if(idx < 0) + { + std::string s(p1, p2); + for(std::string::size_type i = 0; i < s.size(); ++i) + s[i] = static_cast((std::tolower)(static_cast(s[i]))); + idx = ::cutl_details_boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_ASSERT(std::size_t(idx+1) < sizeof(masks) / sizeof(masks[0])); + return masks[idx+1]; +} + +bool BOOST_REGEX_CALL c_regex_traits::isctype(char c, char_class_type mask) +{ + return + ((mask & char_class_space) && (std::isspace)(static_cast(c))) + || ((mask & char_class_print) && (std::isprint)(static_cast(c))) + || ((mask & char_class_cntrl) && (std::iscntrl)(static_cast(c))) + || ((mask & char_class_upper) && (std::isupper)(static_cast(c))) + || ((mask & char_class_lower) && (std::islower)(static_cast(c))) + || ((mask & char_class_alpha) && (std::isalpha)(static_cast(c))) + || ((mask & char_class_digit) && (std::isdigit)(static_cast(c))) + || ((mask & char_class_punct) && (std::ispunct)(static_cast(c))) + || ((mask & char_class_xdigit) && (std::isxdigit)(static_cast(c))) + || ((mask & char_class_blank) && (std::isspace)(static_cast(c)) && !::cutl_details_boost::re_detail::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_vertical) && (::cutl_details_boost::re_detail::is_separator(c) || (c == '\v'))) + || ((mask & char_class_horizontal) && (std::isspace)(static_cast(c)) && !::cutl_details_boost::re_detail::is_separator(c) && (c != '\v')); +} + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const char* p1, const char* p2) +{ + std::string s(p1, p2); + s = ::cutl_details_boost::re_detail::lookup_default_collate_name(s); + if(s.empty() && (p2-p1 == 1)) + s.append(1, *p1); + return s; +} + +int BOOST_REGEX_CALL c_regex_traits::value(char c, int radix) +{ + char b[2] = { c, '\0', }; + char* ep; + int result = std::strtol(b, &ep, radix); + if(ep == b) + return -1; + return result; +} + +} +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +#endif diff --git a/cutl/details/boost/regex/src/cpp_regex_traits.cxx b/cutl/details/boost/regex/src/cpp_regex_traits.cxx new file mode 100644 index 0000000..7124191 --- /dev/null +++ b/cutl/details/boost/regex/src/cpp_regex_traits.cxx @@ -0,0 +1,117 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cpp_regex_traits.cpp + * VERSION see + * DESCRIPTION: Implements cpp_regex_traits (and associated helper classes). + */ + +#define BOOST_REGEX_SOURCE +#include +#ifndef BOOST_NO_STD_LOCALE +#include +#include + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::memset; +} +#endif + +namespace cutl_details_boost{ namespace re_detail{ + +void cpp_regex_traits_char_layer::init() +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); +#ifndef BOOST_NO_STD_MESSAGES +#ifndef __IBMCPP__ + std::messages::catalog cat = static_cast::catalog>(-1); +#else + std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if(cat_name.size() && (m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + cutl_details_boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_syntax(i)); + for(string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { +#endif + for(regex_constants::syntax_type j = 1; j < regex_constants::syntax_max; ++j) + { + const char* ptr = get_default_syntax(j); + while(ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = j; + ++ptr; + } + } +#ifndef BOOST_NO_STD_MESSAGES + } +#endif + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if(m_char_map[i] == 0) + { + if(this->m_pctype->is(std::ctype_base::lower, i)) + m_char_map[i] = regex_constants::escape_type_class; + else if(this->m_pctype->is(std::ctype_base::upper, i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + }while(0xFF != i++); +} + +} // re_detail +} // boost +#endif + diff --git a/cutl/details/boost/regex/src/cregex.cxx b/cutl/details/boost/regex/src/cregex.cxx new file mode 100644 index 0000000..cf8154f --- /dev/null +++ b/cutl/details/boost/regex/src/cregex.cxx @@ -0,0 +1,647 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: cregex.cpp + * VERSION: see + * DESCRIPTION: Implements high level class cutl_details_boost::RexEx + */ + + +#define BOOST_REGEX_SOURCE + +#include +#include +#if !defined(BOOST_NO_STD_STRING) +#include +#include +#include +typedef cutl_details_boost::match_flag_type match_flag_type; +#include + +#ifdef BOOST_MSVC +#pragma warning(disable:4309) +#endif +#ifdef BOOST_INTEL +#pragma warning(disable:981 383) +#endif + +namespace cutl_details_boost{ + +#ifdef __BORLANDC__ +#if __BORLANDC__ < 0x530 +// +// we need to instantiate the vector classes we use +// since declaring a reference to type doesn't seem to +// do the job... +std::vector inst1; +std::vector inst2; +#endif +#endif + +namespace{ + +template +std::string to_string(iterator i, iterator j) +{ + std::string s; + while(i != j) + { + s.append(1, *i); + ++i; + } + return s; +} + +inline std::string to_string(const char* i, const char* j) +{ + return std::string(i, j); +} + +} +namespace re_detail{ + +class RegExData +{ +public: + enum type + { + type_pc, + type_pf, + type_copy + }; + regex e; + cmatch m; +#ifndef BOOST_REGEX_NO_FILEITER + match_results fm; +#endif + type t; + const char* pbase; +#ifndef BOOST_REGEX_NO_FILEITER + mapfile::iterator fbase; +#endif + std::map > strings; + std::map > positions; + void update(); + void clean(); + RegExData() : e(), m(), +#ifndef BOOST_REGEX_NO_FILEITER + fm(), +#endif + t(type_copy), pbase(0), +#ifndef BOOST_REGEX_NO_FILEITER + fbase(), +#endif + strings(), positions() {} +}; + +void RegExData::update() +{ + strings.erase(strings.begin(), strings.end()); + positions.erase(positions.begin(), positions.end()); + if(t == type_pc) + { + for(unsigned int i = 0; i < m.size(); ++i) + { + if(m[i].matched) strings[i] = std::string(m[i].first, m[i].second); + positions[i] = m[i].matched ? m[i].first - pbase : -1; + } + } +#ifndef BOOST_REGEX_NO_FILEITER + else + { + for(unsigned int i = 0; i < fm.size(); ++i) + { + if(fm[i].matched) strings[i] = to_string(fm[i].first, fm[i].second); + positions[i] = fm[i].matched ? fm[i].first - fbase : -1; + } + } +#endif + t = type_copy; +} + +void RegExData::clean() +{ +#ifndef BOOST_REGEX_NO_FILEITER + fbase = mapfile::iterator(); + fm = match_results(); +#endif +} + +} // namespace + +RegEx::RegEx() +{ + pdata = new re_detail::RegExData(); +} + +RegEx::RegEx(const RegEx& o) +{ + pdata = new re_detail::RegExData(*(o.pdata)); +} + +RegEx::~RegEx() +{ + delete pdata; +} + +RegEx::RegEx(const char* c, bool icase) +{ + pdata = new re_detail::RegExData(); + SetExpression(c, icase); +} + +RegEx::RegEx(const std::string& s, bool icase) +{ + pdata = new re_detail::RegExData(); + SetExpression(s.c_str(), icase); +} + +RegEx& RegEx::operator=(const RegEx& o) +{ + *pdata = *(o.pdata); + return *this; +} + +RegEx& RegEx::operator=(const char* p) +{ + SetExpression(p, false); + return *this; +} + +unsigned int RegEx::SetExpression(const char* p, bool icase) +{ + cutl_details_boost::uint_fast32_t f = icase ? regex::normal | regex::icase : regex::normal; + return pdata->e.set_expression(p, f); +} + +unsigned int RegEx::error_code()const +{ + return pdata->e.error_code(); +} + + +std::string RegEx::Expression()const +{ + return pdata->e.expression(); +} + +// +// now matching operators: +// +bool RegEx::Match(const char* p, match_flag_type flags) +{ + pdata->t = re_detail::RegExData::type_pc; + pdata->pbase = p; + const char* end = p; + while(*end)++end; + + if(regex_match(p, end, pdata->m, pdata->e, flags)) + { + pdata->update(); + return true; + } + return false; +} + +bool RegEx::Search(const char* p, match_flag_type flags) +{ + pdata->t = re_detail::RegExData::type_pc; + pdata->pbase = p; + const char* end = p; + while(*end)++end; + + if(regex_search(p, end, pdata->m, pdata->e, flags)) + { + pdata->update(); + return true; + } + return false; +} +namespace re_detail{ +struct pred1 +{ + GrepCallback cb; + RegEx* pe; + pred1(GrepCallback c, RegEx* i) : cb(c), pe(i) {} + bool operator()(const cmatch& m) + { + pe->pdata->m = m; + return cb(*pe); + } +}; +} +unsigned int RegEx::Grep(GrepCallback cb, const char* p, match_flag_type flags) +{ + pdata->t = re_detail::RegExData::type_pc; + pdata->pbase = p; + const char* end = p; + while(*end)++end; + + unsigned int result = regex_grep(re_detail::pred1(cb, this), p, end, pdata->e, flags); + if(result) + pdata->update(); + return result; +} +namespace re_detail{ +struct pred2 +{ + std::vector& v; + RegEx* pe; + pred2(std::vector& o, RegEx* e) : v(o), pe(e) {} + bool operator()(const cmatch& m) + { + pe->pdata->m = m; + v.push_back(std::string(m[0].first, m[0].second)); + return true; + } +private: + pred2& operator=(const pred2&); +}; +} + +unsigned int RegEx::Grep(std::vector& v, const char* p, match_flag_type flags) +{ + pdata->t = re_detail::RegExData::type_pc; + pdata->pbase = p; + const char* end = p; + while(*end)++end; + + unsigned int result = regex_grep(re_detail::pred2(v, this), p, end, pdata->e, flags); + if(result) + pdata->update(); + return result; +} +namespace re_detail{ +struct pred3 +{ + std::vector& v; + const char* base; + RegEx* pe; + pred3(std::vector& o, const char* pb, RegEx* p) : v(o), base(pb), pe(p) {} + bool operator()(const cmatch& m) + { + pe->pdata->m = m; + v.push_back(static_cast(m[0].first - base)); + return true; + } +private: + pred3& operator=(const pred3&); +}; +} +unsigned int RegEx::Grep(std::vector& v, const char* p, match_flag_type flags) +{ + pdata->t = re_detail::RegExData::type_pc; + pdata->pbase = p; + const char* end = p; + while(*end)++end; + + unsigned int result = regex_grep(re_detail::pred3(v, p, this), p, end, pdata->e, flags); + if(result) + pdata->update(); + return result; +} +#ifndef BOOST_REGEX_NO_FILEITER +namespace re_detail{ +struct pred4 +{ + GrepFileCallback cb; + RegEx* pe; + const char* file; + bool ok; + pred4(GrepFileCallback c, RegEx* i, const char* f) : cb(c), pe(i), file(f), ok(true) {} + bool operator()(const match_results& m) + { + pe->pdata->t = RegExData::type_pf; + pe->pdata->fm = m; + pe->pdata->update(); + ok = cb(file, *pe); + return ok; + } +}; +} +namespace{ +void BuildFileList(std::list* pl, const char* files, bool recurse) +{ + file_iterator start(files); + file_iterator end; + if(recurse) + { + // go through sub directories: + char buf[MAX_PATH]; + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(buf, MAX_PATH, start.root())); + if(*buf == 0) + { + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(buf, MAX_PATH, ".")); + re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, directory_iterator::separator())); + re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, "*")); + } + else + { + re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, directory_iterator::separator())); + re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, "*")); + } + directory_iterator dstart(buf); + directory_iterator dend; + + // now get the file mask bit of "files": + const char* ptr = files; + while(*ptr) ++ptr; + while((ptr != files) && (*ptr != *directory_iterator::separator()) && (*ptr != '/'))--ptr; + if(ptr != files) ++ptr; + + while(dstart != dend) + { +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) + (::sprintf_s)(buf, sizeof(buf), "%s%s%s", dstart.path(), directory_iterator::separator(), ptr); +#else + (std::sprintf)(buf, "%s%s%s", dstart.path(), directory_iterator::separator(), ptr); +#endif + BuildFileList(pl, buf, recurse); + ++dstart; + } + } + while(start != end) + { + pl->push_back(*start); + ++start; + } +} +} + +unsigned int RegEx::GrepFiles(GrepFileCallback cb, const char* files, bool recurse, match_flag_type flags) +{ + unsigned int result = 0; + std::list file_list; + BuildFileList(&file_list, files, recurse); + std::list::iterator start, end; + start = file_list.begin(); + end = file_list.end(); + + while(start != end) + { + mapfile map((*start).c_str()); + pdata->t = re_detail::RegExData::type_pf; + pdata->fbase = map.begin(); + re_detail::pred4 pred(cb, this, (*start).c_str()); + int r = regex_grep(pred, map.begin(), map.end(), pdata->e, flags); + result += r; + ++start; + pdata->clean(); + if(pred.ok == false) + return result; + } + + return result; +} + + +unsigned int RegEx::FindFiles(FindFilesCallback cb, const char* files, bool recurse, match_flag_type flags) +{ + unsigned int result = 0; + std::list file_list; + BuildFileList(&file_list, files, recurse); + std::list::iterator start, end; + start = file_list.begin(); + end = file_list.end(); + + while(start != end) + { + mapfile map((*start).c_str()); + pdata->t = re_detail::RegExData::type_pf; + pdata->fbase = map.begin(); + + if(regex_search(map.begin(), map.end(), pdata->fm, pdata->e, flags)) + { + ++result; + if(false == cb((*start).c_str())) + return result; + } + //pdata->update(); + ++start; + //pdata->clean(); + } + + return result; +} +#endif + +#ifdef BOOST_REGEX_V3 +#define regex_replace regex_merge +#endif + +std::string RegEx::Merge(const std::string& in, const std::string& fmt, + bool copy, match_flag_type flags) +{ + std::string result; + re_detail::string_out_iterator i(result); + if(!copy) flags |= format_no_copy; + regex_replace(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags); + return result; +} + +std::string RegEx::Merge(const char* in, const char* fmt, + bool copy, match_flag_type flags) +{ + std::string result; + if(!copy) flags |= format_no_copy; + re_detail::string_out_iterator i(result); + regex_replace(i, in, in + std::strlen(in), pdata->e, fmt, flags); + return result; +} + +std::size_t RegEx::Split(std::vector& v, + std::string& s, + match_flag_type flags, + unsigned max_count) +{ + return regex_split(std::back_inserter(v), s, pdata->e, flags, max_count); +} + + + +// +// now operators for returning what matched in more detail: +// +std::size_t RegEx::Position(int i)const +{ + switch(pdata->t) + { + case re_detail::RegExData::type_pc: + return pdata->m[i].matched ? pdata->m[i].first - pdata->pbase : RegEx::npos; +#ifndef BOOST_REGEX_NO_FILEITER + case re_detail::RegExData::type_pf: + return pdata->fm[i].matched ? pdata->fm[i].first - pdata->fbase : RegEx::npos; +#endif + case re_detail::RegExData::type_copy: + { + std::map >::iterator pos = pdata->positions.find(i); + if(pos == pdata->positions.end()) + return RegEx::npos; + return (*pos).second; + } + } + return RegEx::npos; +} + +std::size_t RegEx::Marks()const +{ + return pdata->e.mark_count(); +} + + +std::size_t RegEx::Length(int i)const +{ + switch(pdata->t) + { + case re_detail::RegExData::type_pc: + return pdata->m[i].matched ? pdata->m[i].second - pdata->m[i].first : RegEx::npos; +#ifndef BOOST_REGEX_NO_FILEITER + case re_detail::RegExData::type_pf: + return pdata->fm[i].matched ? pdata->fm[i].second - pdata->fm[i].first : RegEx::npos; +#endif + case re_detail::RegExData::type_copy: + { + std::map >::iterator pos = pdata->strings.find(i); + if(pos == pdata->strings.end()) + return RegEx::npos; + return (*pos).second.size(); + } + } + return RegEx::npos; +} + +bool RegEx::Matched(int i)const +{ + switch(pdata->t) + { + case re_detail::RegExData::type_pc: + return pdata->m[i].matched; +#ifndef BOOST_REGEX_NO_FILEITER + case re_detail::RegExData::type_pf: + return pdata->fm[i].matched; +#endif + case re_detail::RegExData::type_copy: + { + std::map >::iterator pos = pdata->strings.find(i); + if(pos == pdata->strings.end()) + return false; + return true; + } + } + return false; +} + + +std::string RegEx::What(int i)const +{ + std::string result; + switch(pdata->t) + { + case re_detail::RegExData::type_pc: + if(pdata->m[i].matched) + result.assign(pdata->m[i].first, pdata->m[i].second); + break; + case re_detail::RegExData::type_pf: + if(pdata->m[i].matched) + result.assign(to_string(pdata->m[i].first, pdata->m[i].second)); + break; + case re_detail::RegExData::type_copy: + { + std::map >::iterator pos = pdata->strings.find(i); + if(pos != pdata->strings.end()) + result = (*pos).second; + break; + } + } + return result; +} + +const std::size_t RegEx::npos = ~static_cast(0); + +} // namespace cutl_details_boost + +#if defined(__BORLANDC__) && (__BORLANDC__ >= 0x550) && (__BORLANDC__ <= 0x551) && !defined(_RWSTD_COMPILE_INSTANTIATE) +// +// this is an ugly hack to work around an ugly problem: +// by default this file will produce unresolved externals during +// linking unless _RWSTD_COMPILE_INSTANTIATE is defined (Borland bug). +// However if _RWSTD_COMPILE_INSTANTIATE is defined then we get separate +// copies of basic_string's static data in the RTL and this DLL, this messes +// with basic_string's memory management and results in run-time crashes, +// Oh sweet joy of Catch 22.... +// +namespace std{ +template<> template<> +basic_string& BOOST_REGEX_DECL +basic_string::replace(char* f1, char* f2, const char* i1, const char* i2) +{ + unsigned insert_pos = f1 - begin(); + unsigned remove_len = f2 - f1; + unsigned insert_len = i2 - i1; + unsigned org_size = size(); + if(insert_len > remove_len) + { + append(insert_len-remove_len, ' '); + std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end()); + std::copy(i1, i2, begin() + insert_pos); + } + else + { + std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len); + std::copy(i1, i2, begin() + insert_pos); + erase(size() + insert_len - remove_len); + } + return *this; +} +template<> template<> +basic_string& BOOST_REGEX_DECL +basic_string::replace(wchar_t* f1, wchar_t* f2, const wchar_t* i1, const wchar_t* i2) +{ + unsigned insert_pos = f1 - begin(); + unsigned remove_len = f2 - f1; + unsigned insert_len = i2 - i1; + unsigned org_size = size(); + if(insert_len > remove_len) + { + append(insert_len-remove_len, ' '); + std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end()); + std::copy(i1, i2, begin() + insert_pos); + } + else + { + std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len); + std::copy(i1, i2, begin() + insert_pos); + erase(size() + insert_len - remove_len); + } + return *this; +} +} // namespace std +#endif + +#endif + + + + + + + + + + + + + + + + diff --git a/cutl/details/boost/regex/src/fileiter.cxx b/cutl/details/boost/regex/src/fileiter.cxx new file mode 100644 index 0000000..0a669a8 --- /dev/null +++ b/cutl/details/boost/regex/src/fileiter.cxx @@ -0,0 +1,919 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: fileiter.cpp + * VERSION: see + * DESCRIPTION: Implements file io primitives + directory searching for class cutl_details_boost::RegEx. + */ + + +#define BOOST_REGEX_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::sprintf; + using ::fseek; + using ::fread; + using ::ftell; + using ::fopen; + using ::fclose; + using ::FILE; + using ::strcpy; + using ::strcpy; + using ::strcat; + using ::strcmp; + using ::strlen; +} +#endif + + +#ifndef BOOST_REGEX_NO_FILEITER + +#if defined(__CYGWIN__) || defined(__CYGWIN32__) +#include +#endif + +#ifdef BOOST_MSVC +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ + namespace re_detail{ +// start with the operating system specific stuff: + +#if (defined(__BORLANDC__) || defined(BOOST_REGEX_FI_WIN32_DIR) || defined(BOOST_MSVC)) && !defined(BOOST_RE_NO_WIN32) + +// platform is DOS or Windows +// directories are separated with '\\' +// and names are insensitive of case + +BOOST_REGEX_DECL const char* _fi_sep = "\\"; +const char* _fi_sep_alt = "/"; +#define BOOST_REGEX_FI_TRANSLATE(c) std::tolower(c) + +#else + +// platform is not DOS or Windows +// directories are separated with '/' +// and names are sensitive of case + +BOOST_REGEX_DECL const char* _fi_sep = "/"; +const char* _fi_sep_alt = _fi_sep; +#define BOOST_REGEX_FI_TRANSLATE(c) c + +#endif + +#ifdef BOOST_REGEX_FI_WIN32_MAP + +void mapfile::open(const char* file) +{ +#if defined(BOOST_NO_ANSI_APIS) + int filename_size = strlen(file); + LPWSTR wide_file = (LPWSTR)_alloca( (filename_size + 1) * sizeof(WCHAR) ); + if(::MultiByteToWideChar(CP_ACP, 0, file, filename_size, wide_file, filename_size + 1) == 0) + hfile = INVALID_HANDLE_VALUE; + else + hfile = CreateFileW(wide_file, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); +#elif defined(__CYGWIN__)||defined(__CYGWIN32__) + char win32file[ MAX_PATH ]; + cygwin_conv_to_win32_path( file, win32file ); + hfile = CreateFileA(win32file, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); +#else + hfile = CreateFileA(file, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); +#endif + if(hfile != INVALID_HANDLE_VALUE) + { + hmap = CreateFileMapping(hfile, 0, PAGE_READONLY, 0, 0, 0); + if((hmap == INVALID_HANDLE_VALUE) || (hmap == NULL)) + { + CloseHandle(hfile); + hmap = 0; + hfile = 0; + std::runtime_error err("Unable to create file mapping."); + cutl_details_boost::re_detail::raise_runtime_error(err); + } + _first = static_cast(MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0)); + if(_first == 0) + { + CloseHandle(hmap); + CloseHandle(hfile); + hmap = 0; + hfile = 0; + std::runtime_error err("Unable to create file mapping."); + } + _last = _first + GetFileSize(hfile, 0); + } + else + { + hfile = 0; +#ifndef BOOST_NO_EXCEPTIONS + throw std::runtime_error("Unable to open file."); +#else + BOOST_REGEX_NOEH_ASSERT(hfile != INVALID_HANDLE_VALUE); +#endif + } +} + +void mapfile::close() +{ + if(hfile != INVALID_HANDLE_VALUE) + { + UnmapViewOfFile((void*)_first); + CloseHandle(hmap); + CloseHandle(hfile); + hmap = hfile = 0; + _first = _last = 0; + } +} + +#elif !defined(BOOST_RE_NO_STL) + +mapfile_iterator& mapfile_iterator::operator = (const mapfile_iterator& i) +{ + if(file && node) + file->unlock(node); + file = i.file; + node = i.node; + offset = i.offset; + if(file) + file->lock(node); + return *this; +} + +mapfile_iterator& mapfile_iterator::operator++ () +{ + if((++offset == mapfile::buf_size) && file) + { + ++node; + offset = 0; + file->lock(node); + file->unlock(node-1); + } + return *this; +} + +mapfile_iterator mapfile_iterator::operator++ (int) +{ + mapfile_iterator temp(*this); + if((++offset == mapfile::buf_size) && file) + { + ++node; + offset = 0; + file->lock(node); + file->unlock(node-1); + } + return temp; +} + +mapfile_iterator& mapfile_iterator::operator-- () +{ + if((offset == 0) && file) + { + --node; + offset = mapfile::buf_size - 1; + file->lock(node); + file->unlock(node + 1); + } + else + --offset; + return *this; +} + +mapfile_iterator mapfile_iterator::operator-- (int) +{ + mapfile_iterator temp(*this); + if((offset == 0) && file) + { + --node; + offset = mapfile::buf_size - 1; + file->lock(node); + file->unlock(node + 1); + } + else + --offset; + return temp; +} + +mapfile_iterator operator + (const mapfile_iterator& i, long off) +{ + mapfile_iterator temp(i); + temp += off; + return temp; +} + +mapfile_iterator operator - (const mapfile_iterator& i, long off) +{ + mapfile_iterator temp(i); + temp -= off; + return temp; +} + +mapfile::iterator mapfile::begin()const +{ + return mapfile_iterator(this, 0); +} + +mapfile::iterator mapfile::end()const +{ + return mapfile_iterator(this, _size); +} + +void mapfile::lock(pointer* node)const +{ + BOOST_ASSERT(node >= _first); + BOOST_ASSERT(node <= _last); + if(node < _last) + { + if(*node == 0) + { + if(condemed.empty()) + { + *node = new char[sizeof(int) + buf_size]; + *(reinterpret_cast(*node)) = 1; + } + else + { + pointer* p = condemed.front(); + condemed.pop_front(); + *node = *p; + *p = 0; + *(reinterpret_cast(*node)) = 1; + } + + std::size_t read_size = 0; + int read_pos = std::fseek(hfile, (node - _first) * buf_size, SEEK_SET); + + if(0 == read_pos && node == _last - 1) + read_size = std::fread(*node + sizeof(int), _size % buf_size, 1, hfile); + else + read_size = std::fread(*node + sizeof(int), buf_size, 1, hfile); +#ifndef BOOST_NO_EXCEPTIONS + if((read_size == 0) || (std::ferror(hfile))) + { + throw std::runtime_error("Unable to read file."); + } +#else + BOOST_REGEX_NOEH_ASSERT((0 == std::ferror(hfile)) && (read_size != 0)); +#endif + } + else + { + if(*reinterpret_cast(*node) == 0) + { + *reinterpret_cast(*node) = 1; + condemed.remove(node); + } + else + ++(*reinterpret_cast(*node)); + } + } +} + +void mapfile::unlock(pointer* node)const +{ + BOOST_ASSERT(node >= _first); + BOOST_ASSERT(node <= _last); + if(node < _last) + { + if(--(*reinterpret_cast(*node)) == 0) + { + condemed.push_back(node); + } + } +} + +long int get_file_length(std::FILE* hfile) +{ + long int result; + std::fseek(hfile, 0, SEEK_END); + result = std::ftell(hfile); + std::fseek(hfile, 0, SEEK_SET); + return result; +} + + +void mapfile::open(const char* file) +{ + hfile = std::fopen(file, "rb"); +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + if(hfile != 0) + { + _size = get_file_length(hfile); + long cnodes = (_size + buf_size - 1) / buf_size; + + // check that number of nodes is not too high: + if(cnodes > (long)((INT_MAX) / sizeof(pointer*))) + { + std::fclose(hfile); + hfile = 0; + _size = 0; + return; + } + + _first = new pointer[(int)cnodes]; + _last = _first + cnodes; + std::memset(_first, 0, cnodes*sizeof(pointer)); + } + else + { + std::runtime_error err("Unable to open file."); + } +#ifndef BOOST_NO_EXCEPTIONS + }catch(...) + { close(); throw; } +#endif +} + +void mapfile::close() +{ + if(hfile != 0) + { + pointer* p = _first; + while(p != _last) + { + if(*p) + delete[] *p; + ++p; + } + delete[] _first; + _size = 0; + _first = _last = 0; + std::fclose(hfile); + hfile = 0; + condemed.erase(condemed.begin(), condemed.end()); + } +} + + +#endif + +inline _fi_find_handle find_first_file(const char* wild, _fi_find_data& data) +{ +#ifdef BOOST_NO_ANSI_APIS + std::size_t wild_size = std::strlen(wild); + LPWSTR wide_wild = (LPWSTR)_alloca( (wild_size + 1) * sizeof(WCHAR) ); + if (::MultiByteToWideChar(CP_ACP, 0, wild, wild_size, wide_wild, wild_size + 1) == 0) + return _fi_invalid_handle; + + return FindFirstFileW(wide_wild, &data); +#else + return FindFirstFileA(wild, &data); +#endif +} + +inline bool find_next_file(_fi_find_handle hf, _fi_find_data& data) +{ +#ifdef BOOST_NO_ANSI_APIS + return FindNextFileW(hf, &data); +#else + return FindNextFileA(hf, &data); +#endif +} + +inline void copy_find_file_result_with_overflow_check(const _fi_find_data& data, char* path, size_t max_size) +{ +#ifdef BOOST_NO_ANSI_APIS + if (::WideCharToMultiByte(CP_ACP, 0, data.cFileName, -1, path, max_size, NULL, NULL) == 0) + re_detail::overflow_error_if_not_zero(1); +#else + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(path, max_size, data.cFileName)); +#endif +} + +inline bool is_not_current_or_parent_path_string(const _fi_find_data& data) +{ +#ifdef BOOST_NO_ANSI_APIS + return (std::wcscmp(data.cFileName, L".") && std::wcscmp(data.cFileName, L"..")); +#else + return (std::strcmp(data.cFileName, ".") && std::strcmp(data.cFileName, "..")); +#endif +} + + +file_iterator::file_iterator() +{ + _root = _path = 0; + ref = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + _root = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_root) + _path = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_path) + ptr = _path; + *_path = 0; + *_root = 0; + ref = new file_iterator_ref(); + BOOST_REGEX_NOEH_ASSERT(ref) + ref->hf = _fi_invalid_handle; + ref->count = 1; +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + delete[] _root; + delete[] _path; + delete ref; + throw; + } +#endif +} + +file_iterator::file_iterator(const char* wild) +{ + _root = _path = 0; + ref = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + _root = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_root) + _path = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_path) + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_root, MAX_PATH, wild)); + ptr = _root; + while(*ptr)++ptr; + while((ptr > _root) && (*ptr != *_fi_sep) && (*ptr != *_fi_sep_alt))--ptr; + if((ptr == _root) && ( (*ptr== *_fi_sep) || (*ptr==*_fi_sep_alt) ) ) + { + _root[1]='\0'; + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, _root)); + } + else + { + *ptr = 0; + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, _root)); + if(*_path == 0) + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, ".")); + re_detail::overflow_error_if_not_zero(re_detail::strcat_s(_path, MAX_PATH, _fi_sep)); + } + ptr = _path + std::strlen(_path); + + ref = new file_iterator_ref(); + BOOST_REGEX_NOEH_ASSERT(ref) + ref->hf = find_first_file(wild, ref->_data); + ref->count = 1; + + if(ref->hf == _fi_invalid_handle) + { + *_path = 0; + ptr = _path; + } + else + { + copy_find_file_result_with_overflow_check(ref->_data, ptr, (MAX_PATH - (ptr - _path))); + if(ref->_data.dwFileAttributes & _fi_dir) + next(); + } +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + delete[] _root; + delete[] _path; + delete ref; + throw; + } +#endif +} + +file_iterator::file_iterator(const file_iterator& other) +{ + _root = _path = 0; + ref = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + _root = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_root) + _path = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_path) + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_root, MAX_PATH, other._root)); + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, other._path)); + ptr = _path + (other.ptr - other._path); + ref = other.ref; +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + delete[] _root; + delete[] _path; + throw; + } +#endif + ++(ref->count); +} + +file_iterator& file_iterator::operator=(const file_iterator& other) +{ + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_root, MAX_PATH, other._root)); + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, other._path)); + ptr = _path + (other.ptr - other._path); + if(--(ref->count) == 0) + { + if(ref->hf != _fi_invalid_handle) + FindClose(ref->hf); + delete ref; + } + ref = other.ref; + ++(ref->count); + return *this; +} + + +file_iterator::~file_iterator() +{ + delete[] _root; + delete[] _path; + if(--(ref->count) == 0) + { + if(ref->hf != _fi_invalid_handle) + FindClose(ref->hf); + delete ref; + } +} + +file_iterator file_iterator::operator++(int) +{ + file_iterator temp(*this); + next(); + return temp; +} + + +void file_iterator::next() +{ + if(ref->hf != _fi_invalid_handle) + { + bool cont = true; + while(cont) + { + cont = find_next_file(ref->hf, ref->_data); + if(cont && ((ref->_data.dwFileAttributes & _fi_dir) == 0)) + break; + } + if(!cont) + { + // end of sequence + FindClose(ref->hf); + ref->hf = _fi_invalid_handle; + *_path = 0; + ptr = _path; + } + else + copy_find_file_result_with_overflow_check(ref->_data, ptr, MAX_PATH - (ptr - _path)); + } +} + + + +directory_iterator::directory_iterator() +{ + _root = _path = 0; + ref = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + _root = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_root) + _path = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_path) + ptr = _path; + *_path = 0; + *_root = 0; + ref = new file_iterator_ref(); + BOOST_REGEX_NOEH_ASSERT(ref) + ref->hf = _fi_invalid_handle; + ref->count = 1; +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + delete[] _root; + delete[] _path; + delete ref; + throw; + } +#endif +} + +directory_iterator::directory_iterator(const char* wild) +{ + _root = _path = 0; + ref = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + _root = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_root) + _path = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_path) + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_root, MAX_PATH, wild)); + ptr = _root; + while(*ptr)++ptr; + while((ptr > _root) && (*ptr != *_fi_sep) && (*ptr != *_fi_sep_alt))--ptr; + + if((ptr == _root) && ( (*ptr== *_fi_sep) || (*ptr==*_fi_sep_alt) ) ) + { + _root[1]='\0'; + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, _root)); + } + else + { + *ptr = 0; + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, _root)); + if(*_path == 0) + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, ".")); + re_detail::overflow_error_if_not_zero(re_detail::strcat_s(_path, MAX_PATH, _fi_sep)); + } + ptr = _path + std::strlen(_path); + + ref = new file_iterator_ref(); + BOOST_REGEX_NOEH_ASSERT(ref) + ref->count = 1; + ref->hf = find_first_file(wild, ref->_data); + if(ref->hf == _fi_invalid_handle) + { + *_path = 0; + ptr = _path; + } + else + { + copy_find_file_result_with_overflow_check(ref->_data, ptr, MAX_PATH - (ptr - _path)); + if(((ref->_data.dwFileAttributes & _fi_dir) == 0) || (std::strcmp(ptr, ".") == 0) || (std::strcmp(ptr, "..") == 0)) + next(); + } +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + delete[] _root; + delete[] _path; + delete ref; + throw; + } +#endif +} + +directory_iterator::~directory_iterator() +{ + delete[] _root; + delete[] _path; + if(--(ref->count) == 0) + { + if(ref->hf != _fi_invalid_handle) + FindClose(ref->hf); + delete ref; + } +} + +directory_iterator::directory_iterator(const directory_iterator& other) +{ + _root = _path = 0; + ref = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + _root = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_root) + _path = new char[MAX_PATH]; + BOOST_REGEX_NOEH_ASSERT(_path) + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_root, MAX_PATH, other._root)); + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, other._path)); + ptr = _path + (other.ptr - other._path); + ref = other.ref; +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + delete[] _root; + delete[] _path; + throw; + } +#endif + ++(ref->count); +} + +directory_iterator& directory_iterator::operator=(const directory_iterator& other) +{ + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_root, MAX_PATH, other._root)); + re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(_path, MAX_PATH, other._path)); + ptr = _path + (other.ptr - other._path); + if(--(ref->count) == 0) + { + if(ref->hf != _fi_invalid_handle) + FindClose(ref->hf); + delete ref; + } + ref = other.ref; + ++(ref->count); + return *this; +} + +directory_iterator directory_iterator::operator++(int) +{ + directory_iterator temp(*this); + next(); + return temp; +} + +void directory_iterator::next() +{ + if(ref->hf != _fi_invalid_handle) + { + bool cont = true; + while(cont) + { + cont = find_next_file(ref->hf, ref->_data); + if(cont && (ref->_data.dwFileAttributes & _fi_dir)) + { + if(is_not_current_or_parent_path_string(ref->_data)) + break; + } + } + if(!cont) + { + // end of sequence + FindClose(ref->hf); + ref->hf = _fi_invalid_handle; + *_path = 0; + ptr = _path; + } + else + copy_find_file_result_with_overflow_check(ref->_data, ptr, MAX_PATH - (ptr - _path)); + } +} + + +#ifdef BOOST_REGEX_FI_POSIX_DIR + +struct _fi_priv_data +{ + char root[MAX_PATH]; + char* mask; + DIR* d; + _fi_priv_data(const char* p); +}; + +_fi_priv_data::_fi_priv_data(const char* p) +{ + std::strcpy(root, p); + mask = root; + while(*mask) ++mask; + while((mask > root) && (*mask != *_fi_sep) && (*mask != *_fi_sep_alt)) --mask; + if(mask == root && ((*mask== *_fi_sep) || (*mask == *_fi_sep_alt)) ) + { + root[1] = '\0'; + std::strcpy(root+2, p+1); + mask = root+2; + } + else if(mask == root) + { + root[0] = '.'; + root[1] = '\0'; + std::strcpy(root+2, p); + mask = root+2; + } + else + { + *mask = 0; + ++mask; + } +} + +bool iswild(const char* mask, const char* name) +{ + while(*mask && *name) + { + switch(*mask) + { + case '?': + ++name; + ++mask; + continue; + case '*': + ++mask; + if(*mask == 0) + return true; + while(*name) + { + if(iswild(mask, name)) + return true; + ++name; + } + return false; + case '.': + if(0 == *name) + { + ++mask; + continue; + } + // fall through: + default: + if(BOOST_REGEX_FI_TRANSLATE(*mask) != BOOST_REGEX_FI_TRANSLATE(*name)) + return false; + ++mask; + ++name; + continue; + } + } + if(*mask != *name) + return false; + return true; +} + +unsigned _fi_attributes(const char* root, const char* name) +{ + char buf[MAX_PATH]; + if( ( (root[0] == *_fi_sep) || (root[0] == *_fi_sep_alt) ) && (root[1] == '\0') ) + (std::sprintf)(buf, "%s%s", root, name); + else + (std::sprintf)(buf, "%s%s%s", root, _fi_sep, name); + DIR* d = opendir(buf); + if(d) + { + closedir(d); + return _fi_dir; + } + return 0; +} + +_fi_find_handle _fi_FindFirstFile(const char* lpFileName, _fi_find_data* lpFindFileData) +{ + _fi_find_handle dat = new _fi_priv_data(lpFileName); + + DIR* h = opendir(dat->root); + dat->d = h; + if(h != 0) + { + if(_fi_FindNextFile(dat, lpFindFileData)) + return dat; + } + delete dat; + return 0; +} + +bool _fi_FindNextFile(_fi_find_handle dat, _fi_find_data* lpFindFileData) +{ + dirent* d; + do + { + d = readdir(dat->d); + } while(d && !iswild(dat->mask, d->d_name)); + + if(d) + { + std::strcpy(lpFindFileData->cFileName, d->d_name); + lpFindFileData->dwFileAttributes = _fi_attributes(dat->root, d->d_name); + return true; + } + return false; +} + +bool _fi_FindClose(_fi_find_handle dat) +{ + closedir(dat->d); + delete dat; + return true; +} + +#endif + +} // namespace re_detail +} // namspace boost + +#endif // BOOST_REGEX_NO_FILEITER + + + + + + + + + + + + diff --git a/cutl/details/boost/regex/src/icu.cxx b/cutl/details/boost/regex/src/icu.cxx new file mode 100644 index 0000000..982b134 --- /dev/null +++ b/cutl/details/boost/regex/src/icu.cxx @@ -0,0 +1,507 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE icu.cpp + * VERSION see + * DESCRIPTION: Unicode regular expressions on top of the ICU Library. + */ +#define BOOST_REGEX_SOURCE + +#include +#ifdef BOOST_HAS_ICU +#define BOOST_REGEX_ICU_INSTANTIATE +#include + +#ifdef BOOST_INTEL +#pragma warning(disable:981 2259 383) +#endif + +namespace cutl_details_boost{ + +namespace re_detail{ + +icu_regex_traits_implementation::string_type icu_regex_traits_implementation::do_transform(const char_type* p1, const char_type* p2, const U_NAMESPACE_QUALIFIER Collator* pcoll) const +{ + // TODO make thread safe!!!! : + typedef u32_to_u16_iterator itt; + itt i(p1), j(p2); +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + std::vector< ::UChar> t(i, j); +#else + std::vector< ::UChar> t; + while(i != j) + t.push_back(*i++); +#endif + ::uint8_t result[100]; + ::int32_t len; + if(t.size()) + len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), result, sizeof(result)); + else + len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), result, sizeof(result)); + if(std::size_t(len) > sizeof(result)) + { + scoped_array< ::uint8_t> presult(new ::uint8_t[len+1]); + if(t.size()) + len = pcoll->getSortKey(&*t.begin(), static_cast< ::int32_t>(t.size()), presult.get(), len+1); + else + len = pcoll->getSortKey(static_cast(0), static_cast< ::int32_t>(0), presult.get(), len+1); + if((0 == presult[len-1]) && (len > 1)) + --len; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + return string_type(presult.get(), presult.get()+len); +#else + string_type sresult; + ::uint8_t const* ia = presult.get(); + ::uint8_t const* ib = presult.get()+len; + while(ia != ib) + sresult.push_back(*ia++); + return sresult; +#endif + } + if((0 == result[len-1]) && (len > 1)) + --len; +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + return string_type(result, result+len); +#else + string_type sresult; + ::uint8_t const* ia = result; + ::uint8_t const* ib = result+len; + while(ia != ib) + sresult.push_back(*ia++); + return sresult; +#endif +} + +} + +icu_regex_traits::size_type icu_regex_traits::length(const char_type* p) +{ + size_type result = 0; + while(*p) + { + ++p; + ++result; + } + return result; +} + +// +// define our bitmasks: +// +const icu_regex_traits::char_class_type icu_regex_traits::mask_blank = icu_regex_traits::char_class_type(1) << offset_blank; +const icu_regex_traits::char_class_type icu_regex_traits::mask_space = icu_regex_traits::char_class_type(1) << offset_space; +const icu_regex_traits::char_class_type icu_regex_traits::mask_xdigit = icu_regex_traits::char_class_type(1) << offset_xdigit; +const icu_regex_traits::char_class_type icu_regex_traits::mask_underscore = icu_regex_traits::char_class_type(1) << offset_underscore; +const icu_regex_traits::char_class_type icu_regex_traits::mask_unicode = icu_regex_traits::char_class_type(1) << offset_unicode; +const icu_regex_traits::char_class_type icu_regex_traits::mask_any = icu_regex_traits::char_class_type(1) << offset_any; +const icu_regex_traits::char_class_type icu_regex_traits::mask_ascii = icu_regex_traits::char_class_type(1) << offset_ascii; +const icu_regex_traits::char_class_type icu_regex_traits::mask_horizontal = icu_regex_traits::char_class_type(1) << offset_horizontal; +const icu_regex_traits::char_class_type icu_regex_traits::mask_vertical = icu_regex_traits::char_class_type(1) << offset_vertical; + +icu_regex_traits::char_class_type icu_regex_traits::lookup_icu_mask(const ::UChar32* p1, const ::UChar32* p2) +{ + static const ::UChar32 prop_name_table[] = { + /* any */ 'a', 'n', 'y', + /* ascii */ 'a', 's', 'c', 'i', 'i', + /* assigned */ 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* c* */ 'c', '*', + /* cc */ 'c', 'c', + /* cf */ 'c', 'f', + /* closepunctuation */ 'c', 'l', 'o', 's', 'e', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* cn */ 'c', 'n', + /* co */ 'c', 'o', + /* connectorpunctuation */ 'c', 'o', 'n', 'n', 'e', 'c', 't', 'o', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* control */ 'c', 'o', 'n', 't', 'r', 'o', 'l', + /* cs */ 'c', 's', + /* currencysymbol */ 'c', 'u', 'r', 'r', 'e', 'n', 'c', 'y', 's', 'y', 'm', 'b', 'o', 'l', + /* dashpunctuation */ 'd', 'a', 's', 'h', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* decimaldigitnumber */ 'd', 'e', 'c', 'i', 'm', 'a', 'l', 'd', 'i', 'g', 'i', 't', 'n', 'u', 'm', 'b', 'e', 'r', + /* enclosingmark */ 'e', 'n', 'c', 'l', 'o', 's', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* finalpunctuation */ 'f', 'i', 'n', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* format */ 'f', 'o', 'r', 'm', 'a', 't', + /* initialpunctuation */ 'i', 'n', 'i', 't', 'i', 'a', 'l', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* l* */ 'l', '*', + /* letter */ 'l', 'e', 't', 't', 'e', 'r', + /* letternumber */ 'l', 'e', 't', 't', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* lineseparator */ 'l', 'i', 'n', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* ll */ 'l', 'l', + /* lm */ 'l', 'm', + /* lo */ 'l', 'o', + /* lowercaseletter */ 'l', 'o', 'w', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* lt */ 'l', 't', + /* lu */ 'l', 'u', + /* m* */ 'm', '*', + /* mark */ 'm', 'a', 'r', 'k', + /* mathsymbol */ 'm', 'a', 't', 'h', 's', 'y', 'm', 'b', 'o', 'l', + /* mc */ 'm', 'c', + /* me */ 'm', 'e', + /* mn */ 'm', 'n', + /* modifierletter */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* modifiersymbol */ 'm', 'o', 'd', 'i', 'f', 'i', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* n* */ 'n', '*', + /* nd */ 'n', 'd', + /* nl */ 'n', 'l', + /* no */ 'n', 'o', + /* nonspacingmark */ 'n', 'o', 'n', 's', 'p', 'a', 'c', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* notassigned */ 'n', 'o', 't', 'a', 's', 's', 'i', 'g', 'n', 'e', 'd', + /* number */ 'n', 'u', 'm', 'b', 'e', 'r', + /* openpunctuation */ 'o', 'p', 'e', 'n', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* other */ 'o', 't', 'h', 'e', 'r', + /* otherletter */ 'o', 't', 'h', 'e', 'r', 'l', 'e', 't', 't', 'e', 'r', + /* othernumber */ 'o', 't', 'h', 'e', 'r', 'n', 'u', 'm', 'b', 'e', 'r', + /* otherpunctuation */ 'o', 't', 'h', 'e', 'r', 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* othersymbol */ 'o', 't', 'h', 'e', 'r', 's', 'y', 'm', 'b', 'o', 'l', + /* p* */ 'p', '*', + /* paragraphseparator */ 'p', 'a', 'r', 'a', 'g', 'r', 'a', 'p', 'h', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* pc */ 'p', 'c', + /* pd */ 'p', 'd', + /* pe */ 'p', 'e', + /* pf */ 'p', 'f', + /* pi */ 'p', 'i', + /* po */ 'p', 'o', + /* privateuse */ 'p', 'r', 'i', 'v', 'a', 't', 'e', 'u', 's', 'e', + /* ps */ 'p', 's', + /* punctuation */ 'p', 'u', 'n', 'c', 't', 'u', 'a', 't', 'i', 'o', 'n', + /* s* */ 's', '*', + /* sc */ 's', 'c', + /* separator */ 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* sk */ 's', 'k', + /* sm */ 's', 'm', + /* so */ 's', 'o', + /* spaceseparator */ 's', 'p', 'a', 'c', 'e', 's', 'e', 'p', 'a', 'r', 'a', 't', 'o', 'r', + /* spacingcombiningmark */ 's', 'p', 'a', 'c', 'i', 'n', 'g', 'c', 'o', 'm', 'b', 'i', 'n', 'i', 'n', 'g', 'm', 'a', 'r', 'k', + /* surrogate */ 's', 'u', 'r', 'r', 'o', 'g', 'a', 't', 'e', + /* symbol */ 's', 'y', 'm', 'b', 'o', 'l', + /* titlecase */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', + /* titlecaseletter */ 't', 'i', 't', 'l', 'e', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* uppercaseletter */ 'u', 'p', 'p', 'e', 'r', 'c', 'a', 's', 'e', 'l', 'e', 't', 't', 'e', 'r', + /* z* */ 'z', '*', + /* zl */ 'z', 'l', + /* zp */ 'z', 'p', + /* zs */ 'z', 's', + }; + + static const re_detail::character_pointer_range< ::UChar32> range_data[] = { + { prop_name_table+0, prop_name_table+3, }, // any + { prop_name_table+3, prop_name_table+8, }, // ascii + { prop_name_table+8, prop_name_table+16, }, // assigned + { prop_name_table+16, prop_name_table+18, }, // c* + { prop_name_table+18, prop_name_table+20, }, // cc + { prop_name_table+20, prop_name_table+22, }, // cf + { prop_name_table+22, prop_name_table+38, }, // closepunctuation + { prop_name_table+38, prop_name_table+40, }, // cn + { prop_name_table+40, prop_name_table+42, }, // co + { prop_name_table+42, prop_name_table+62, }, // connectorpunctuation + { prop_name_table+62, prop_name_table+69, }, // control + { prop_name_table+69, prop_name_table+71, }, // cs + { prop_name_table+71, prop_name_table+85, }, // currencysymbol + { prop_name_table+85, prop_name_table+100, }, // dashpunctuation + { prop_name_table+100, prop_name_table+118, }, // decimaldigitnumber + { prop_name_table+118, prop_name_table+131, }, // enclosingmark + { prop_name_table+131, prop_name_table+147, }, // finalpunctuation + { prop_name_table+147, prop_name_table+153, }, // format + { prop_name_table+153, prop_name_table+171, }, // initialpunctuation + { prop_name_table+171, prop_name_table+173, }, // l* + { prop_name_table+173, prop_name_table+179, }, // letter + { prop_name_table+179, prop_name_table+191, }, // letternumber + { prop_name_table+191, prop_name_table+204, }, // lineseparator + { prop_name_table+204, prop_name_table+206, }, // ll + { prop_name_table+206, prop_name_table+208, }, // lm + { prop_name_table+208, prop_name_table+210, }, // lo + { prop_name_table+210, prop_name_table+225, }, // lowercaseletter + { prop_name_table+225, prop_name_table+227, }, // lt + { prop_name_table+227, prop_name_table+229, }, // lu + { prop_name_table+229, prop_name_table+231, }, // m* + { prop_name_table+231, prop_name_table+235, }, // mark + { prop_name_table+235, prop_name_table+245, }, // mathsymbol + { prop_name_table+245, prop_name_table+247, }, // mc + { prop_name_table+247, prop_name_table+249, }, // me + { prop_name_table+249, prop_name_table+251, }, // mn + { prop_name_table+251, prop_name_table+265, }, // modifierletter + { prop_name_table+265, prop_name_table+279, }, // modifiersymbol + { prop_name_table+279, prop_name_table+281, }, // n* + { prop_name_table+281, prop_name_table+283, }, // nd + { prop_name_table+283, prop_name_table+285, }, // nl + { prop_name_table+285, prop_name_table+287, }, // no + { prop_name_table+287, prop_name_table+301, }, // nonspacingmark + { prop_name_table+301, prop_name_table+312, }, // notassigned + { prop_name_table+312, prop_name_table+318, }, // number + { prop_name_table+318, prop_name_table+333, }, // openpunctuation + { prop_name_table+333, prop_name_table+338, }, // other + { prop_name_table+338, prop_name_table+349, }, // otherletter + { prop_name_table+349, prop_name_table+360, }, // othernumber + { prop_name_table+360, prop_name_table+376, }, // otherpunctuation + { prop_name_table+376, prop_name_table+387, }, // othersymbol + { prop_name_table+387, prop_name_table+389, }, // p* + { prop_name_table+389, prop_name_table+407, }, // paragraphseparator + { prop_name_table+407, prop_name_table+409, }, // pc + { prop_name_table+409, prop_name_table+411, }, // pd + { prop_name_table+411, prop_name_table+413, }, // pe + { prop_name_table+413, prop_name_table+415, }, // pf + { prop_name_table+415, prop_name_table+417, }, // pi + { prop_name_table+417, prop_name_table+419, }, // po + { prop_name_table+419, prop_name_table+429, }, // privateuse + { prop_name_table+429, prop_name_table+431, }, // ps + { prop_name_table+431, prop_name_table+442, }, // punctuation + { prop_name_table+442, prop_name_table+444, }, // s* + { prop_name_table+444, prop_name_table+446, }, // sc + { prop_name_table+446, prop_name_table+455, }, // separator + { prop_name_table+455, prop_name_table+457, }, // sk + { prop_name_table+457, prop_name_table+459, }, // sm + { prop_name_table+459, prop_name_table+461, }, // so + { prop_name_table+461, prop_name_table+475, }, // spaceseparator + { prop_name_table+475, prop_name_table+495, }, // spacingcombiningmark + { prop_name_table+495, prop_name_table+504, }, // surrogate + { prop_name_table+504, prop_name_table+510, }, // symbol + { prop_name_table+510, prop_name_table+519, }, // titlecase + { prop_name_table+519, prop_name_table+534, }, // titlecaseletter + { prop_name_table+534, prop_name_table+549, }, // uppercaseletter + { prop_name_table+549, prop_name_table+551, }, // z* + { prop_name_table+551, prop_name_table+553, }, // zl + { prop_name_table+553, prop_name_table+555, }, // zp + { prop_name_table+555, prop_name_table+557, }, // zs + }; + + static const icu_regex_traits::char_class_type icu_class_map[] = { + icu_regex_traits::mask_any, // any + icu_regex_traits::mask_ascii, // ascii + (0x3FFFFFFFu) & ~(U_GC_CN_MASK), // assigned + U_GC_C_MASK, // c* + U_GC_CC_MASK, // cc + U_GC_CF_MASK, // cf + U_GC_PE_MASK, // closepunctuation + U_GC_CN_MASK, // cn + U_GC_CO_MASK, // co + U_GC_PC_MASK, // connectorpunctuation + U_GC_CC_MASK, // control + U_GC_CS_MASK, // cs + U_GC_SC_MASK, // currencysymbol + U_GC_PD_MASK, // dashpunctuation + U_GC_ND_MASK, // decimaldigitnumber + U_GC_ME_MASK, // enclosingmark + U_GC_PF_MASK, // finalpunctuation + U_GC_CF_MASK, // format + U_GC_PI_MASK, // initialpunctuation + U_GC_L_MASK, // l* + U_GC_L_MASK, // letter + U_GC_NL_MASK, // letternumber + U_GC_ZL_MASK, // lineseparator + U_GC_LL_MASK, // ll + U_GC_LM_MASK, // lm + U_GC_LO_MASK, // lo + U_GC_LL_MASK, // lowercaseletter + U_GC_LT_MASK, // lt + U_GC_LU_MASK, // lu + U_GC_M_MASK, // m* + U_GC_M_MASK, // mark + U_GC_SM_MASK, // mathsymbol + U_GC_MC_MASK, // mc + U_GC_ME_MASK, // me + U_GC_MN_MASK, // mn + U_GC_LM_MASK, // modifierletter + U_GC_SK_MASK, // modifiersymbol + U_GC_N_MASK, // n* + U_GC_ND_MASK, // nd + U_GC_NL_MASK, // nl + U_GC_NO_MASK, // no + U_GC_MN_MASK, // nonspacingmark + U_GC_CN_MASK, // notassigned + U_GC_N_MASK, // number + U_GC_PS_MASK, // openpunctuation + U_GC_C_MASK, // other + U_GC_LO_MASK, // otherletter + U_GC_NO_MASK, // othernumber + U_GC_PO_MASK, // otherpunctuation + U_GC_SO_MASK, // othersymbol + U_GC_P_MASK, // p* + U_GC_ZP_MASK, // paragraphseparator + U_GC_PC_MASK, // pc + U_GC_PD_MASK, // pd + U_GC_PE_MASK, // pe + U_GC_PF_MASK, // pf + U_GC_PI_MASK, // pi + U_GC_PO_MASK, // po + U_GC_CO_MASK, // privateuse + U_GC_PS_MASK, // ps + U_GC_P_MASK, // punctuation + U_GC_S_MASK, // s* + U_GC_SC_MASK, // sc + U_GC_Z_MASK, // separator + U_GC_SK_MASK, // sk + U_GC_SM_MASK, // sm + U_GC_SO_MASK, // so + U_GC_ZS_MASK, // spaceseparator + U_GC_MC_MASK, // spacingcombiningmark + U_GC_CS_MASK, // surrogate + U_GC_S_MASK, // symbol + U_GC_LT_MASK, // titlecase + U_GC_LT_MASK, // titlecaseletter + U_GC_LU_MASK, // uppercaseletter + U_GC_Z_MASK, // z* + U_GC_ZL_MASK, // zl + U_GC_ZP_MASK, // zp + U_GC_ZS_MASK, // zs + }; + + + static const re_detail::character_pointer_range< ::UChar32>* ranges_begin = range_data; + static const re_detail::character_pointer_range< ::UChar32>* ranges_end = range_data + (sizeof(range_data)/sizeof(range_data[0])); + + re_detail::character_pointer_range< ::UChar32> t = { p1, p2, }; + const re_detail::character_pointer_range< ::UChar32>* p = std::lower_bound(ranges_begin, ranges_end, t); + if((p != ranges_end) && (t == *p)) + return icu_class_map[p - ranges_begin]; + return 0; +} + +icu_regex_traits::char_class_type icu_regex_traits::lookup_classname(const char_type* p1, const char_type* p2) const +{ + static const char_class_type masks[] = + { + 0, + U_GC_L_MASK | U_GC_ND_MASK, + U_GC_L_MASK, + mask_blank, + U_GC_CC_MASK | U_GC_CF_MASK | U_GC_ZL_MASK | U_GC_ZP_MASK, + U_GC_ND_MASK, + U_GC_ND_MASK, + (0x3FFFFFFFu) & ~(U_GC_CC_MASK | U_GC_CF_MASK | U_GC_CS_MASK | U_GC_CN_MASK | U_GC_Z_MASK), + mask_horizontal, + U_GC_LL_MASK, + U_GC_LL_MASK, + ~(U_GC_C_MASK), + U_GC_P_MASK, + char_class_type(U_GC_Z_MASK) | mask_space, + char_class_type(U_GC_Z_MASK) | mask_space, + U_GC_LU_MASK, + mask_unicode, + U_GC_LU_MASK, + mask_vertical, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_L_MASK | U_GC_ND_MASK | U_GC_MN_MASK) | mask_underscore, + char_class_type(U_GC_ND_MASK) | mask_xdigit, + }; + + int idx = ::cutl_details_boost::re_detail::get_default_class_id(p1, p2); + if(idx >= 0) + return masks[idx+1]; + char_class_type result = lookup_icu_mask(p1, p2); + if(result != 0) + return result; + + if(idx < 0) + { + string_type s(p1, p2); + string_type::size_type i = 0; + while(i < s.size()) + { + s[i] = static_cast((::u_tolower)(s[i])); + if(::u_isspace(s[i]) || (s[i] == '-') || (s[i] == '_')) + s.erase(s.begin()+i, s.begin()+i+1); + else + { + s[i] = static_cast((::u_tolower)(s[i])); + ++i; + } + } + if(s.size()) + idx = ::cutl_details_boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + if(idx >= 0) + return masks[idx+1]; + if(s.size()) + result = lookup_icu_mask(&*s.begin(), &*s.begin() + s.size()); + if(result != 0) + return result; + } + BOOST_ASSERT(std::size_t(idx+1) < sizeof(masks) / sizeof(masks[0])); + return masks[idx+1]; +} + +icu_regex_traits::string_type icu_regex_traits::lookup_collatename(const char_type* p1, const char_type* p2) const +{ + string_type result; + if(std::find_if(p1, p2, std::bind2nd(std::greater< ::UChar32>(), 0x7f)) == p2) + { +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + std::string s(p1, p2); +#else + std::string s; + const char_type* p3 = p1; + while(p3 != p2) + s.append(1, *p3++); +#endif + // Try Unicode name: + UErrorCode err = U_ZERO_ERROR; + UChar32 c = ::u_charFromName(U_UNICODE_CHAR_NAME, s.c_str(), &err); + if(U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // Try Unicode-extended name: + err = U_ZERO_ERROR; + c = ::u_charFromName(U_EXTENDED_CHAR_NAME, s.c_str(), &err); + if(U_SUCCESS(err)) + { + result.push_back(c); + return result; + } + // try POSIX name: + s = ::cutl_details_boost::re_detail::lookup_default_collate_name(s); +#ifndef BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS + result.assign(s.begin(), s.end()); +#else + result.clear(); + std::string::const_iterator si, sj; + si = s.begin(); + sj = s.end(); + while(si != sj) + result.push_back(*si++); +#endif + } + if(result.empty() && (p2-p1 == 1)) + result.push_back(*p1); + return result; +} + +bool icu_regex_traits::isctype(char_type c, char_class_type f) const +{ + // check for standard catagories first: + char_class_type m = char_class_type(1u << u_charType(c)); + if((m & f) != 0) + return true; + // now check for special cases: + if(((f & mask_blank) != 0) && u_isblank(c)) + return true; + if(((f & mask_space) != 0) && u_isspace(c)) + return true; + if(((f & mask_xdigit) != 0) && (u_digit(c, 16) >= 0)) + return true; + if(((f & mask_unicode) != 0) && (c >= 0x100)) + return true; + if(((f & mask_underscore) != 0) && (c == '_')) + return true; + if(((f & mask_any) != 0) && (c <= 0x10FFFF)) + return true; + if(((f & mask_ascii) != 0) && (c <= 0x7F)) + return true; + if(((f & mask_vertical) != 0) && (::cutl_details_boost::re_detail::is_separator(c) || (c == static_cast('\v')) || (m == U_GC_ZL_MASK) || (m == U_GC_ZP_MASK))) + return true; + if(((f & mask_horizontal) != 0) && !::cutl_details_boost::re_detail::is_separator(c) && u_isspace(c) && (c != static_cast('\v'))) + return true; + return false; +} + +} + +#endif // BOOST_HAS_ICU diff --git a/cutl/details/boost/regex/src/instances.cxx b/cutl/details/boost/regex/src/instances.cxx new file mode 100644 index 0000000..02d3f36 --- /dev/null +++ b/cutl/details/boost/regex/src/instances.cxx @@ -0,0 +1,32 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: instances.cpp + * VERSION: see + * DESCRIPTION: regex narrow character template instances. + */ + +#define BOOST_REGEX_SOURCE + +#include + +#if !defined(BOOST_REGEX_NO_EXTERNAL_TEMPLATES) +#define BOOST_REGEX_NARROW_INSTANTIATE + +#ifdef __BORLANDC__ +#pragma hrdstop +#endif + +#include + +#endif diff --git a/cutl/details/boost/regex/src/posix_api.cxx b/cutl/details/boost/regex/src/posix_api.cxx new file mode 100644 index 0000000..589a235 --- /dev/null +++ b/cutl/details/boost/regex/src/posix_api.cxx @@ -0,0 +1,289 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: posix_api.cpp + * VERSION: see + * DESCRIPTION: Implements the Posix API wrappers. + */ + +#define BOOST_REGEX_SOURCE + +#include +#include +#include + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::sprintf; + using ::strcpy; + using ::strcmp; +} +#endif + + +namespace cutl_details_boost{ + +namespace{ + +unsigned int magic_value = 25631; + +const char* names[] = { + "REG_NOERROR", + "REG_NOMATCH", + "REG_BADPAT", + "REG_ECOLLATE", + "REG_ECTYPE", + "REG_EESCAPE", + "REG_ESUBREG", + "REG_EBRACK", + "REG_EPAREN", + "REG_EBRACE", + "REG_BADBR", + "REG_ERANGE", + "REG_ESPACE", + "REG_BADRPT", + "REG_EEND", + "REG_ESIZE", + "REG_ERPAREN", + "REG_EMPTY", + "REG_ECOMPLEXITY", + "REG_ESTACK", + "REG_E_PERL", + "REG_E_UNKNOWN", +}; +} // namespace + +typedef cutl_details_boost::basic_regex > c_regex_type; + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA* expression, const char* ptr, int f) +{ + if(expression->re_magic != magic_value) + { + expression->guts = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + expression->guts = new c_regex_type(); +#ifndef BOOST_NO_EXCEPTIONS + } catch(...) + { + return REG_ESPACE; + } +#else + if(0 == expression->guts) + return REG_E_MEMORY; +#endif + } + // set default flags: + cutl_details_boost::uint_fast32_t flags = (f & REG_PERLEX) ? 0 : ((f & REG_EXTENDED) ? regex::extended : regex::basic); + expression->eflags = (f & REG_NEWLINE) ? match_not_dot_newline : match_default; + // and translate those that are actually set: + + if(f & REG_NOCOLLATE) + { + flags |= regex::nocollate; +#ifndef BOOST_REGEX_V3 + flags &= ~regex::collate; +#endif + } + + if(f & REG_NOSUB) + { + //expression->eflags |= match_any; + flags |= regex::nosubs; + } + + if(f & REG_NOSPEC) + flags |= regex::literal; + if(f & REG_ICASE) + flags |= regex::icase; + if(f & REG_ESCAPE_IN_LISTS) + flags &= ~regex::no_escape_in_lists; + if(f & REG_NEWLINE_ALT) + flags |= regex::newline_alt; + + const char* p2; + if(f & REG_PEND) + p2 = expression->re_endp; + else p2 = ptr + std::strlen(ptr); + + int result; + +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + expression->re_magic = magic_value; + static_cast(expression->guts)->set_expression(ptr, p2, flags); + expression->re_nsub = static_cast(expression->guts)->mark_count() - 1; + result = static_cast(expression->guts)->error_code(); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(const cutl_details_boost::regex_error& be) + { + result = be.code(); + } + catch(...) + { + result = REG_E_UNKNOWN; + } +#endif + if(result) + regfreeA(expression); + return result; + +} + +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int code, const regex_tA* e, char* buf, regsize_t buf_size) +{ + std::size_t result = 0; + if(code & REG_ITOA) + { + code &= ~REG_ITOA; + if(code <= (int)REG_E_UNKNOWN) + { + result = std::strlen(names[code]) + 1; + if(buf_size >= result) + re_detail::strcpy_s(buf, buf_size, names[code]); + return result; + } + return result; + } + if(code == REG_ATOI) + { + char localbuf[5]; + if(e == 0) + return 0; + for(int i = 0; i <= (int)REG_E_UNKNOWN; ++i) + { + if(std::strcmp(e->re_endp, names[i]) == 0) + { +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) + (::sprintf_s)(localbuf, 5, "%d", i); +#else + (std::sprintf)(localbuf, "%d", i); +#endif + if(std::strlen(localbuf) < buf_size) + re_detail::strcpy_s(buf, buf_size, localbuf); + return std::strlen(localbuf) + 1; + } + } +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) + (::sprintf_s)(localbuf, 5, "%d", 0); +#else + (std::sprintf)(localbuf, "%d", 0); +#endif + if(std::strlen(localbuf) < buf_size) + re_detail::strcpy_s(buf, buf_size, localbuf); + return std::strlen(localbuf) + 1; + } + if(code <= (int)REG_E_UNKNOWN) + { + std::string p; + if((e) && (e->re_magic == magic_value)) + p = static_cast(e->guts)->get_traits().error_string(static_cast< ::cutl_details_boost::regex_constants::error_type>(code)); + else + { + p = re_detail::get_default_error_string(static_cast< ::cutl_details_boost::regex_constants::error_type>(code)); + } + std::size_t len = p.size(); + if(len < buf_size) + { + re_detail::strcpy_s(buf, buf_size, p.c_str()); + } + return len + 1; + } + if(buf_size) + *buf = 0; + return 0; +} + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA* expression, const char* buf, regsize_t n, regmatch_t* array, int eflags) +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4267) +#endif + bool result = false; + match_flag_type flags = match_default | expression->eflags; + const char* end; + const char* start; + cmatch m; + + if(eflags & REG_NOTBOL) + flags |= match_not_bol; + if(eflags & REG_NOTEOL) + flags |= match_not_eol; + if(eflags & REG_STARTEND) + { + start = buf + array[0].rm_so; + end = buf + array[0].rm_eo; + } + else + { + start = buf; + end = buf + std::strlen(buf); + } + +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + if(expression->re_magic == magic_value) + { + result = regex_search(start, end, m, *static_cast(expression->guts), flags); + } + else + return result; +#ifndef BOOST_NO_EXCEPTIONS + } catch(...) + { + return REG_E_UNKNOWN; + } +#endif + + if(result) + { + // extract what matched: + std::size_t i; + for(i = 0; (i < n) && (i < expression->re_nsub + 1); ++i) + { + array[i].rm_so = (m[i].matched == false) ? -1 : (m[i].first - buf); + array[i].rm_eo = (m[i].matched == false) ? -1 : (m[i].second - buf); + } + // and set anything else to -1: + for(i = expression->re_nsub + 1; i < n; ++i) + { + array[i].rm_so = -1; + array[i].rm_eo = -1; + } + return 0; + } + return REG_NOMATCH; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA* expression) +{ + if(expression->re_magic == magic_value) + { + delete static_cast(expression->guts); + } + expression->re_magic = 0; +} + +} // namespace cutl_details_boost + + + + diff --git a/cutl/details/boost/regex/src/regex.cxx b/cutl/details/boost/regex/src/regex.cxx new file mode 100644 index 0000000..0f3938e --- /dev/null +++ b/cutl/details/boost/regex/src/regex.cxx @@ -0,0 +1,226 @@ +/* + * + * Copyright (c) 1998-2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: regex.cpp + * VERSION: see + * DESCRIPTION: Misc cutl_details_boost::regbase member funnctions. + */ + + +#define BOOST_REGEX_SOURCE + +#include +#include +#include + +#if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && defined(_MSC_VER) && (_MSC_VER >= 1300) +# include +#endif +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#define NOGDI +#define NOUSER +#include +#endif + +#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_REGEX_V3) +#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 +#include +#else +#include +#endif +#endif + +#ifdef BOOST_INTEL +#pragma warning(disable:383) +#endif + +namespace cutl_details_boost{ + +// +// fix: these are declared out of line here to ensure +// that dll builds contain the Virtual table for these +// types - this ensures that exceptions can be thrown +// from the dll and caught in an exe. +regex_error::regex_error(const std::string& s, regex_constants::error_type err, std::ptrdiff_t pos) + : std::runtime_error(s) + , m_error_code(err) + , m_position(pos) +{ +} + +regex_error::regex_error(regex_constants::error_type err) + : std::runtime_error(::cutl_details_boost::re_detail::get_default_error_string(err)) + , m_error_code(err) + , m_position(0) +{ +} + +regex_error::~regex_error() throw() +{ +} + +void regex_error::raise()const +{ +#ifndef BOOST_NO_EXCEPTIONS + ::cutl_details_boost::throw_exception(*this); +#endif +} + + + +namespace re_detail{ + +BOOST_REGEX_DECL void BOOST_REGEX_CALL raise_runtime_error(const std::runtime_error& ex) +{ + ::cutl_details_boost::throw_exception(ex); +} +// +// error checking API: +// +BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(cutl_details_boost::regex::flag_type /*ef*/, match_flag_type mf) +{ +#ifndef BOOST_REGEX_V3 + // + // can't mix match_extra with POSIX matching rules: + // + if((mf & match_extra) && (mf & match_posix)) + { + std::logic_error msg("Usage Error: Can't mix regular expression captures with POSIX matching rules"); + throw_exception(msg); + } +#endif +} + +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + +static void execute_eror() +{ + // we only get here after a stack overflow, + // this has to be a separate proceedure because we + // can't mix __try{}__except block with local objects + // that have destructors: + reset_stack_guard_page(); + std::runtime_error err("Out of stack space, while attempting to match a regular expression."); + raise_runtime_error(err); +} + +bool BOOST_REGEX_CALL abstract_protected_call::execute()const +{ + __try{ + return this->call(); + }__except(EXCEPTION_STACK_OVERFLOW == GetExceptionCode()) + { + execute_eror(); + } + // We never really get here at all: + return false; +} + +BOOST_REGEX_DECL void BOOST_REGEX_CALL reset_stack_guard_page() +{ +#if defined(BOOST_REGEX_HAS_MS_STACK_GUARD) && defined(_MSC_VER) && (_MSC_VER >= 1300) + _resetstkoflw(); +#else + // + // We need to locate the current page being used by the stack, + // move to the page below it and then deallocate and protect + // that page. Note that ideally we would protect only the lowest + // stack page that has been allocated: in practice there + // seems to be no easy way to locate this page, in any case as + // long as the next page is protected, then Windows will figure + // the rest out for us... + // + SYSTEM_INFO si; + GetSystemInfo(&si); + MEMORY_BASIC_INFORMATION mi; + DWORD previous_protection_status; + // + // this is an address in our stack space: + // + LPBYTE page = (LPBYTE)&page; + // + // Get the current memory page in use: + // + VirtualQuery(page, &mi, sizeof(mi)); + // + // Go to the page one below this: + // + page = (LPBYTE)(mi.BaseAddress)-si.dwPageSize; + // + // Free and protect everything from the start of the + // allocation range, to the end of the page below the + // one in use: + // + if (!VirtualFree(mi.AllocationBase, (LPBYTE)page - (LPBYTE)mi.AllocationBase, MEM_DECOMMIT) + || !VirtualProtect(page, si.dwPageSize, PAGE_GUARD | PAGE_READWRITE, &previous_protection_status)) + { + throw std::bad_exception(); + } +#endif +} +#endif + +#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_REGEX_V3) + +#if BOOST_REGEX_MAX_CACHE_BLOCKS == 0 + +BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block() +{ + return ::operator new(BOOST_REGEX_BLOCKSIZE); +} + +BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void* p) +{ + ::operator delete(p); +} + +#else + +#ifdef BOOST_HAS_THREADS +mem_block_cache block_cache = { 0, 0, BOOST_STATIC_MUTEX_INIT, }; +#else +mem_block_cache block_cache = { 0, 0, }; +#endif + +BOOST_REGEX_DECL void* BOOST_REGEX_CALL get_mem_block() +{ + return block_cache.get(); +} + +BOOST_REGEX_DECL void BOOST_REGEX_CALL put_mem_block(void* p) +{ + block_cache.put(p); +} + +#endif + +#endif + +} // namespace re_detail + + + +} // namespace cutl_details_boost + +#if defined(BOOST_RE_USE_VCL) && defined(BOOST_REGEX_DYN_LINK) + +int WINAPI DllEntryPoint(HINSTANCE , unsigned long , void*) +{ + return 1; +} +#endif + diff --git a/cutl/details/boost/regex/src/regex_debug.cxx b/cutl/details/boost/regex/src/regex_debug.cxx new file mode 100644 index 0000000..4b70ec5 --- /dev/null +++ b/cutl/details/boost/regex/src/regex_debug.cxx @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 1998-2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: regex_debug.cpp + * VERSION: see + * DESCRIPTION: Misc. debugging helpers. + */ + + +#define BOOST_REGEX_SOURCE + +#include + + +// +// regex configuration information: this prints out the settings used +// when the library was built - include in debugging builds only: +// +#ifdef BOOST_REGEX_CONFIG_INFO + +#define print_macro regex_lib_print_macro +#define print_expression regex_lib_print_expression +#define print_byte_order regex_lib_print_byte_order +#define print_sign regex_lib_print_sign +#define print_compiler_macros regex_lib_print_compiler_macros +#define print_stdlib_macros regex_lib_print_stdlib_macros +#define print_platform_macros regex_lib_print_platform_macros +#define print_boost_macros regex_lib_print_boost_macros +#define print_separator regex_lib_print_separator +#define OLD_MAIN regex_lib_main +#define NEW_MAIN regex_lib_main2 +#define NO_RECURSE + +#include + +BOOST_REGEX_DECL void BOOST_REGEX_CALL print_regex_library_info() +{ + std::cout << "\n\n"; + print_separator(); + std::cout << "Regex library build configuration:\n\n"; + regex_lib_main2(); +} + +#endif + + + + + diff --git a/cutl/details/boost/regex/src/regex_raw_buffer.cxx b/cutl/details/boost/regex/src/regex_raw_buffer.cxx new file mode 100644 index 0000000..a0c4811 --- /dev/null +++ b/cutl/details/boost/regex/src/regex_raw_buffer.cxx @@ -0,0 +1,70 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_raw_buffer.cpp + * VERSION see + * DESCRIPTION: Member functions for class raw_storage. + */ + + +#define BOOST_REGEX_SOURCE +#include +#include +#include +#include + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::memcpy; + using ::memmove; +} +#endif + + +namespace cutl_details_boost{ namespace re_detail{ + +void BOOST_REGEX_CALL raw_storage::resize(size_type n) +{ + register size_type newsize = start ? last - start : 1024; + while(newsize < n) + newsize *= 2; + register size_type datasize = end - start; + // extend newsize to WORD/DWORD boundary: + newsize = (newsize + padding_mask) & ~(padding_mask); + + // allocate and copy data: + register pointer ptr = static_cast(::operator new(newsize)); + BOOST_REGEX_NOEH_ASSERT(ptr) + std::memcpy(ptr, start, datasize); + + // get rid of old buffer: + ::operator delete(start); + + // and set up pointers: + start = ptr; + end = ptr + datasize; + last = ptr + newsize; +} + +void* BOOST_REGEX_CALL raw_storage::insert(size_type pos, size_type n) +{ + BOOST_ASSERT(pos <= size_type(end - start)); + if(size_type(last - end) < n) + resize(n + (end - start)); + register void* result = start + pos; + std::memmove(start + pos + n, start + pos, (end - start) - pos); + end += n; + return result; +} + +}} // namespaces diff --git a/cutl/details/boost/regex/src/regex_traits_defaults.cxx b/cutl/details/boost/regex/src/regex_traits_defaults.cxx new file mode 100644 index 0000000..fe34bb2 --- /dev/null +++ b/cutl/details/boost/regex/src/regex_traits_defaults.cxx @@ -0,0 +1,692 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits_defaults.cpp + * VERSION see + * DESCRIPTION: Declares API's for access to regex_traits default properties. + */ + +#define BOOST_REGEX_SOURCE +#include + +#include +#ifndef BOOST_NO_WREGEX +#include +#endif + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::tolower; + using ::toupper; +#ifndef BOOST_NO_WREGEX + using ::towlower; + using ::towupper; +#endif +} +#endif + + +namespace cutl_details_boost{ namespace re_detail{ + +BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n) +{ + // if the user hasn't supplied a message catalog, then this supplies + // default "messages" for us to load in the range 1-100. + const char* messages[] = { + "", + "(", + ")", + "$", + "^", + ".", + "*", + "+", + "?", + "[", + "]", + "|", + "\\", + "#", + "-", + "{", + "}", + "0123456789", + "b", + "B", + "<", + ">", + "", + "", + "A`", + "z'", + "\n", + ",", + "a", + "f", + "n", + "r", + "t", + "v", + "x", + "c", + ":", + "=", + "e", + "", + "", + "", + "", + "", + "", + "", + "", + "E", + "Q", + "X", + "C", + "Z", + "G", + "!", + "p", + "P", + "N", + "gk", + "K", + "R", + }; + + return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]); +} + +BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n) +{ + static const char* const s_default_error_messages[] = { + "Success", /* REG_NOERROR 0 error_ok */ + "No match", /* REG_NOMATCH 1 error_no_match */ + "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */ + "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */ + "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */ + "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */ + "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */ + "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */ + "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */ + "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */ + "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */ + "Invalid range end in character class", /* REG_ERANGE 11 error_range */ + "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */ + "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */ + "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */ + "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */ + "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */ + "Empty regular expression.", /* REG_EMPTY 17 error_empty */ + "The complexity of matching the regular expression exceeded predefined bounds. " + "Try refactoring the regular expression to make each choice made by the state machine unambiguous. " + "This exception is thrown to prevent \"eternal\" matches that take an " + "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */ + "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */ + "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */ + "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */ + }; + + return (n > ::cutl_details_boost::regex_constants::error_unknown) ? s_default_error_messages[ ::cutl_details_boost::regex_constants::error_unknown] : s_default_error_messages[n]; +} + +BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(cutl_details_boost::uint_least16_t c) +{ + const cutl_details_boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361, + 0x0483, 0x0486, + 0x0903, 0x0903, + 0x093E, 0x0940, + 0x0949, 0x094C, + 0x0982, 0x0983, + 0x09BE, 0x09C0, + 0x09C7, 0x09CC, + 0x09D7, 0x09D7, + 0x0A3E, 0x0A40, + 0x0A83, 0x0A83, + 0x0ABE, 0x0AC0, + 0x0AC9, 0x0ACC, + 0x0B02, 0x0B03, + 0x0B3E, 0x0B3E, + 0x0B40, 0x0B40, + 0x0B47, 0x0B4C, + 0x0B57, 0x0B57, + 0x0B83, 0x0B83, + 0x0BBE, 0x0BBF, + 0x0BC1, 0x0BCC, + 0x0BD7, 0x0BD7, + 0x0C01, 0x0C03, + 0x0C41, 0x0C44, + 0x0C82, 0x0C83, + 0x0CBE, 0x0CBE, + 0x0CC0, 0x0CC4, + 0x0CC7, 0x0CCB, + 0x0CD5, 0x0CD6, + 0x0D02, 0x0D03, + 0x0D3E, 0x0D40, + 0x0D46, 0x0D4C, + 0x0D57, 0x0D57, + 0x0F7F, 0x0F7F, + 0x20D0, 0x20E1, + 0x3099, 0x309A, + 0xFE20, 0xFE23, + 0xffff, 0xffff, }; + + const cutl_details_boost::uint_least16_t* p = combining_ranges + 1; + while(*p < c) p += 2; + --p; + if((c >= *p) && (c <= *(p+1))) + return true; + return false; +} + +// +// these are the POSIX collating names: +// +BOOST_REGEX_DECL const char* def_coll_names[] = { +"NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", +"vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", +"SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", +"quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", +"left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", +"period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", +"colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", +"question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", +"Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", +"right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", +"g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", +"vertical-line", "right-curly-bracket", "tilde", "DEL", "", +}; + +// these multi-character collating elements +// should keep most Western-European locales +// happy - we should really localise these a +// little more - but this will have to do for +// now: + +BOOST_REGEX_DECL const char* def_multi_coll[] = { + "ae", + "Ae", + "AE", + "ch", + "Ch", + "CH", + "ll", + "Ll", + "LL", + "ss", + "Ss", + "SS", + "nj", + "Nj", + "NJ", + "dz", + "Dz", + "DZ", + "lj", + "Lj", + "LJ", + "", +}; + + + +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name) +{ + unsigned int i = 0; + while(*def_coll_names[i]) + { + if(def_coll_names[i] == name) + { + return std::string(1, char(i)); + } + ++i; + } + i = 0; + while(*def_multi_coll[i]) + { + if(def_multi_coll[i] == name) + { + return def_multi_coll[i]; + } + ++i; + } + return std::string(); +} + +BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c) +{ + return static_cast((std::tolower)((unsigned char)c)); +} + +BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c) +{ + return static_cast((std::toupper)((unsigned char)c)); +} +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c) +{ + return (std::towlower)(c); +} + +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c) +{ + return (std::towupper)(c); +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c) +{ + return (std::towlower)(c); +} + +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c) +{ + return (std::towupper)(c); +} +#endif + +#endif + +BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::escape_syntax_type char_syntax[] = { + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /* */ // 32 + regex_constants::escape_type_identity, /*!*/ + regex_constants::escape_type_identity, /*"*/ + regex_constants::escape_type_identity, /*#*/ + regex_constants::escape_type_identity, /*$*/ + regex_constants::escape_type_identity, /*%*/ + regex_constants::escape_type_identity, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::escape_type_identity, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::escape_type_identity, /*,*/ + regex_constants::escape_type_identity, /*-*/ + regex_constants::escape_type_identity, /*.*/ + regex_constants::escape_type_identity, /*/*/ + regex_constants::escape_type_decimal, /*0*/ + regex_constants::escape_type_backref, /*1*/ + regex_constants::escape_type_backref, /*2*/ + regex_constants::escape_type_backref, /*3*/ + regex_constants::escape_type_backref, /*4*/ + regex_constants::escape_type_backref, /*5*/ + regex_constants::escape_type_backref, /*6*/ + regex_constants::escape_type_backref, /*7*/ + regex_constants::escape_type_backref, /*8*/ + regex_constants::escape_type_backref, /*9*/ + regex_constants::escape_type_identity, /*:*/ + regex_constants::escape_type_identity, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::escape_type_identity, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::escape_type_identity, /*@*/ + regex_constants::escape_type_start_buffer, /*A*/ + regex_constants::escape_type_not_word_assert, /*B*/ + regex_constants::escape_type_C, /*C*/ + regex_constants::escape_type_not_class, /*D*/ + regex_constants::escape_type_E, /*E*/ + regex_constants::escape_type_not_class, /*F*/ + regex_constants::escape_type_G, /*G*/ + regex_constants::escape_type_not_class, /*H*/ + regex_constants::escape_type_not_class, /*I*/ + regex_constants::escape_type_not_class, /*J*/ + regex_constants::escape_type_reset_start_mark, /*K*/ + regex_constants::escape_type_not_class, /*L*/ + regex_constants::escape_type_not_class, /*M*/ + regex_constants::escape_type_named_char, /*N*/ + regex_constants::escape_type_not_class, /*O*/ + regex_constants::escape_type_not_property, /*P*/ + regex_constants::escape_type_Q, /*Q*/ + regex_constants::escape_type_line_ending, /*R*/ + regex_constants::escape_type_not_class, /*S*/ + regex_constants::escape_type_not_class, /*T*/ + regex_constants::escape_type_not_class, /*U*/ + regex_constants::escape_type_not_class, /*V*/ + regex_constants::escape_type_not_class, /*W*/ + regex_constants::escape_type_X, /*X*/ + regex_constants::escape_type_not_class, /*Y*/ + regex_constants::escape_type_Z, /*Z*/ + regex_constants::escape_type_identity, /*[*/ + regex_constants::escape_type_identity, /*\*/ + regex_constants::escape_type_identity, /*]*/ + regex_constants::escape_type_identity, /*^*/ + regex_constants::escape_type_identity, /*_*/ + regex_constants::escape_type_start_buffer, /*`*/ + regex_constants::escape_type_control_a, /*a*/ + regex_constants::escape_type_word_assert, /*b*/ + regex_constants::escape_type_ascii_control, /*c*/ + regex_constants::escape_type_class, /*d*/ + regex_constants::escape_type_e, /*e*/ + regex_constants::escape_type_control_f, /*f*/ + regex_constants::escape_type_extended_backref, /*g*/ + regex_constants::escape_type_class, /*h*/ + regex_constants::escape_type_class, /*i*/ + regex_constants::escape_type_class, /*j*/ + regex_constants::escape_type_extended_backref, /*k*/ + regex_constants::escape_type_class, /*l*/ + regex_constants::escape_type_class, /*m*/ + regex_constants::escape_type_control_n, /*n*/ + regex_constants::escape_type_class, /*o*/ + regex_constants::escape_type_property, /*p*/ + regex_constants::escape_type_class, /*q*/ + regex_constants::escape_type_control_r, /*r*/ + regex_constants::escape_type_class, /*s*/ + regex_constants::escape_type_control_t, /*t*/ + regex_constants::escape_type_class, /*u*/ + regex_constants::escape_type_control_v, /*v*/ + regex_constants::escape_type_class, /*w*/ + regex_constants::escape_type_hex, /*x*/ + regex_constants::escape_type_class, /*y*/ + regex_constants::escape_type_end_buffer, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::escape_type_identity, /*~*/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + regex_constants::escape_type_identity, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + +BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c) +{ + // + // char_syntax determines how the compiler treats a given character + // in a regular expression. + // + static regex_constants::syntax_type char_syntax[] = { + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_newline, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /* */ // 32 + regex_constants::syntax_not, /*!*/ + regex_constants::syntax_char, /*"*/ + regex_constants::syntax_hash, /*#*/ + regex_constants::syntax_dollar, /*$*/ + regex_constants::syntax_char, /*%*/ + regex_constants::syntax_char, /*&*/ + regex_constants::escape_type_end_buffer, /*'*/ + regex_constants::syntax_open_mark, /*(*/ + regex_constants::syntax_close_mark, /*)*/ + regex_constants::syntax_star, /***/ + regex_constants::syntax_plus, /*+*/ + regex_constants::syntax_comma, /*,*/ + regex_constants::syntax_dash, /*-*/ + regex_constants::syntax_dot, /*.*/ + regex_constants::syntax_char, /*/*/ + regex_constants::syntax_digit, /*0*/ + regex_constants::syntax_digit, /*1*/ + regex_constants::syntax_digit, /*2*/ + regex_constants::syntax_digit, /*3*/ + regex_constants::syntax_digit, /*4*/ + regex_constants::syntax_digit, /*5*/ + regex_constants::syntax_digit, /*6*/ + regex_constants::syntax_digit, /*7*/ + regex_constants::syntax_digit, /*8*/ + regex_constants::syntax_digit, /*9*/ + regex_constants::syntax_colon, /*:*/ + regex_constants::syntax_char, /*;*/ + regex_constants::escape_type_left_word, /*<*/ + regex_constants::syntax_equal, /*=*/ + regex_constants::escape_type_right_word, /*>*/ + regex_constants::syntax_question, /*?*/ + regex_constants::syntax_char, /*@*/ + regex_constants::syntax_char, /*A*/ + regex_constants::syntax_char, /*B*/ + regex_constants::syntax_char, /*C*/ + regex_constants::syntax_char, /*D*/ + regex_constants::syntax_char, /*E*/ + regex_constants::syntax_char, /*F*/ + regex_constants::syntax_char, /*G*/ + regex_constants::syntax_char, /*H*/ + regex_constants::syntax_char, /*I*/ + regex_constants::syntax_char, /*J*/ + regex_constants::syntax_char, /*K*/ + regex_constants::syntax_char, /*L*/ + regex_constants::syntax_char, /*M*/ + regex_constants::syntax_char, /*N*/ + regex_constants::syntax_char, /*O*/ + regex_constants::syntax_char, /*P*/ + regex_constants::syntax_char, /*Q*/ + regex_constants::syntax_char, /*R*/ + regex_constants::syntax_char, /*S*/ + regex_constants::syntax_char, /*T*/ + regex_constants::syntax_char, /*U*/ + regex_constants::syntax_char, /*V*/ + regex_constants::syntax_char, /*W*/ + regex_constants::syntax_char, /*X*/ + regex_constants::syntax_char, /*Y*/ + regex_constants::syntax_char, /*Z*/ + regex_constants::syntax_open_set, /*[*/ + regex_constants::syntax_escape, /*\*/ + regex_constants::syntax_close_set, /*]*/ + regex_constants::syntax_caret, /*^*/ + regex_constants::syntax_char, /*_*/ + regex_constants::syntax_char, /*`*/ + regex_constants::syntax_char, /*a*/ + regex_constants::syntax_char, /*b*/ + regex_constants::syntax_char, /*c*/ + regex_constants::syntax_char, /*d*/ + regex_constants::syntax_char, /*e*/ + regex_constants::syntax_char, /*f*/ + regex_constants::syntax_char, /*g*/ + regex_constants::syntax_char, /*h*/ + regex_constants::syntax_char, /*i*/ + regex_constants::syntax_char, /*j*/ + regex_constants::syntax_char, /*k*/ + regex_constants::syntax_char, /*l*/ + regex_constants::syntax_char, /*m*/ + regex_constants::syntax_char, /*n*/ + regex_constants::syntax_char, /*o*/ + regex_constants::syntax_char, /*p*/ + regex_constants::syntax_char, /*q*/ + regex_constants::syntax_char, /*r*/ + regex_constants::syntax_char, /*s*/ + regex_constants::syntax_char, /*t*/ + regex_constants::syntax_char, /*u*/ + regex_constants::syntax_char, /*v*/ + regex_constants::syntax_char, /*w*/ + regex_constants::syntax_char, /*x*/ + regex_constants::syntax_char, /*y*/ + regex_constants::syntax_char, /*z*/ + regex_constants::syntax_open_brace, /*{*/ + regex_constants::syntax_or, /*|*/ + regex_constants::syntax_close_brace, /*}*/ + regex_constants::syntax_char, /*~*/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + regex_constants::syntax_char, /**/ + }; + + return char_syntax[(unsigned char)c]; +} + + +} // re_detail +} // boost diff --git a/cutl/details/boost/regex/src/static_mutex.cxx b/cutl/details/boost/regex/src/static_mutex.cxx new file mode 100644 index 0000000..4d51551 --- /dev/null +++ b/cutl/details/boost/regex/src/static_mutex.cxx @@ -0,0 +1,179 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE static_mutex.cpp + * VERSION see + * DESCRIPTION: Declares static_mutex lock type. + */ + +#define BOOST_REGEX_SOURCE +#include + +#ifdef BOOST_HAS_THREADS + +#include + +#if defined(BOOST_HAS_WINTHREADS) +#ifndef NOMINMAX +# define NOMINMAX +#endif +#define WIN32_LEAN_AND_MEAN +#include +#include +#endif + + +namespace cutl_details_boost{ + +#if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER) + +scoped_static_mutex_lock::scoped_static_mutex_lock(static_mutex& m, bool lk) +: m_mutex(m), m_have_lock(false) +{ + if(lk) + lock(); +} + +scoped_static_mutex_lock::~scoped_static_mutex_lock() +{ + if(m_have_lock) + unlock(); +} + +void scoped_static_mutex_lock::lock() +{ + if(0 == m_have_lock) + { + pthread_mutex_lock(&(m_mutex.m_mutex)); + m_have_lock = true; + } +} + +void scoped_static_mutex_lock::unlock() +{ + if(m_have_lock) + { + pthread_mutex_unlock(&(m_mutex.m_mutex)); + m_have_lock = false; + } +} + +#elif defined(BOOST_HAS_WINTHREADS) + +BOOST_STATIC_ASSERT(sizeof(LONG) == sizeof(cutl_details_boost::int32_t)); + +scoped_static_mutex_lock::scoped_static_mutex_lock(static_mutex& m, bool lk) +: m_mutex(m), m_have_lock(false) +{ + if(lk) + lock(); +} + +scoped_static_mutex_lock::~scoped_static_mutex_lock() +{ + if(m_have_lock) + unlock(); +} + +void scoped_static_mutex_lock::lock() +{ + if(0 == m_have_lock) + { +#if !defined(InterlockedCompareExchangePointer) + while(0 != InterlockedCompareExchange(reinterpret_cast((cutl_details_boost::uint_least16_t*)&(m_mutex.m_mutex)), (void*)1, 0)) +#else + while(0 != InterlockedCompareExchange(reinterpret_cast(&(m_mutex.m_mutex)), 1, 0)) +#endif + { + Sleep(0); + } + m_have_lock = true; + } +} + +void scoped_static_mutex_lock::unlock() +{ + if(m_have_lock) + { +#if !defined(InterlockedCompareExchangePointer) + InterlockedExchange((LONG*)&(m_mutex.m_mutex), 0); +#else + InterlockedExchange(reinterpret_cast(&(m_mutex.m_mutex)), 0); +#endif + m_have_lock = false; + } +} + +#else +// +// Portable version of a static mutex based on Boost.Thread library: +// +#include +#include + +cutl_details_boost::recursive_mutex* static_mutex::m_pmutex = 0; +cutl_details_boost::once_flag static_mutex::m_once = BOOST_ONCE_INIT; + +extern "C" BOOST_REGEX_DECL void cutl_details_boost_regex_free_static_mutex() +{ + delete static_mutex::m_pmutex; + static_mutex::m_pmutex = 0; +} + +void static_mutex::init() +{ + m_pmutex = new cutl_details_boost::recursive_mutex(); + int r = atexit(cutl_details_boost_regex_free_static_mutex); + BOOST_ASSERT(0 == r); +} + +scoped_static_mutex_lock::scoped_static_mutex_lock(static_mutex& , bool lk) +: m_plock(0), m_have_lock(false) +{ + if(lk) + lock(); +} + +scoped_static_mutex_lock::~scoped_static_mutex_lock() +{ + if(m_have_lock) + unlock(); + delete m_plock; +} + +void scoped_static_mutex_lock::lock() +{ + if(0 == m_have_lock) + { + cutl_details_boost::call_once(static_mutex::m_once,&static_mutex::init); + if(0 == m_plock) + m_plock = new cutl_details_boost::recursive_mutex::scoped_lock(*static_mutex::m_pmutex, cutl_details_boost::defer_lock); + m_plock->lock(); + m_have_lock = true; + } +} + +void scoped_static_mutex_lock::unlock() +{ + if(m_have_lock) + { + m_plock->unlock(); + m_have_lock = false; + } +} + +#endif + +} + +#endif // BOOST_HAS_THREADS diff --git a/cutl/details/boost/regex/src/usinstances.cxx b/cutl/details/boost/regex/src/usinstances.cxx new file mode 100644 index 0000000..ad4c2b8 --- /dev/null +++ b/cutl/details/boost/regex/src/usinstances.cxx @@ -0,0 +1,81 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: winstances.cpp + * VERSION: see + * DESCRIPTION: regex unsigned short template instances (MSVC only). + */ + +#define BOOST_REGEX_SOURCE +#ifdef _MSC_VER +#pragma warning(disable:4506) // 'no definition for inline function' +#endif + +#include +#include +#include + +#if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \ + && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\ + && BOOST_WORKAROUND(BOOST_MSVC, <1600) +// +// This is a horrible workaround, but without declaring these symbols extern we get +// duplicate symbol errors when linking if the application is built without +// /Zc:wchar_t +// +#ifdef _CRTIMP2_PURE +# define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE +#else +# define BOOST_REGEX_STDLIB_DECL _CRTIMP2 +#endif + +namespace std{ + +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) +template class BOOST_REGEX_STDLIB_DECL allocator; +template class BOOST_REGEX_STDLIB_DECL _String_val >; +template class BOOST_REGEX_STDLIB_DECL basic_string, allocator >; +#endif + +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400)) +template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits::length(unsigned short const*); +#endif + +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( + const basic_string, allocator >&, + const basic_string, allocator >&); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( + const unsigned short *, + const basic_string, allocator >&); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( + const basic_string, allocator >&, + const unsigned short *); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<( + const basic_string, allocator >&, + const basic_string, allocator >&); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>( + const basic_string, allocator >&, + const basic_string, allocator >&); +} +#endif + +#include + +#if !defined(BOOST_NO_WREGEX) && defined(BOOST_REGEX_HAS_OTHER_WCHAR_T) && !defined(BOOST_REGEX_NO_EXTERNAL_TEMPLATES) +#define BOOST_REGEX_US_INSTANTIATE + +#include + +#endif + + diff --git a/cutl/details/boost/regex/src/w32_regex_traits.cxx b/cutl/details/boost/regex/src/w32_regex_traits.cxx new file mode 100644 index 0000000..d17db73 --- /dev/null +++ b/cutl/details/boost/regex/src/w32_regex_traits.cxx @@ -0,0 +1,650 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE w32_regex_traits.cpp + * VERSION see + * DESCRIPTION: Implements w32_regex_traits (and associated helper classes). + */ + +#define BOOST_REGEX_SOURCE +#include + +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +#include +#include + +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +# define NOMINMAX +#endif +#define NOGDI +#include + +#if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE) +#pragma comment(lib, "user32.lib") +#endif + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::memset; +} +#endif + +namespace cutl_details_boost{ namespace re_detail{ + +#ifdef BOOST_NO_ANSI_APIS +UINT get_code_page_for_locale_id(lcid_type idx) +{ + WCHAR code_page_string[7]; + if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0) + return 0; + + return static_cast(_wtol(code_page_string)); +} +#endif + + +void w32_regex_traits_char_layer::init() +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + std::memset(m_char_map, 0, sizeof(m_char_map)); + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::cutl_details_boost::re_detail::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + ::cutl_details_boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::cutl_details_boost::re_detail::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i)); + for(string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[static_cast(mss[j])] = i; + } + } + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } + // + // finish off by calculating our escape types: + // + unsigned char i = 'A'; + do + { + if(m_char_map[i] == 0) + { + if(::cutl_details_boost::re_detail::w32_is(this->m_locale, 0x0002u, (char)i)) + m_char_map[i] = regex_constants::escape_type_class; + else if(::cutl_details_boost::re_detail::w32_is(this->m_locale, 0x0001u, (char)i)) + m_char_map[i] = regex_constants::escape_type_not_class; + } + }while(0xFF != i++); + + // + // fill in lower case map: + // + char char_map[1 << CHAR_BIT]; + for(int ii = 0; ii < (1 << CHAR_BIT); ++ii) + char_map[ii] = static_cast(ii); +#ifndef BOOST_NO_ANSI_APIS + int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT); + BOOST_ASSERT(r != 0); +#else + UINT code_page = get_code_page_for_locale_id(this->m_locale); + BOOST_ASSERT(code_page != 0); + + WCHAR wide_char_map[1 << CHAR_BIT]; + int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT); + BOOST_ASSERT(conv_r != 0); + + WCHAR wide_lower_map[1 << CHAR_BIT]; + int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT); + BOOST_ASSERT(r != 0); + + conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL); + BOOST_ASSERT(conv_r != 0); +#endif + if(r < (1 << CHAR_BIT)) + { + // if we have multibyte characters then not all may have been given + // a lower case mapping: + for(int jj = r; jj < (1 << CHAR_BIT); ++jj) + this->m_lower_map[jj] = static_cast(jj); + } + +#ifndef BOOST_NO_ANSI_APIS + r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map); +#else + r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map); +#endif + BOOST_ASSERT(0 != r); +} + +BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale() +{ + return ::GetUserDefaultLCID(); +} + +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx) +{ +#ifndef BOOST_NO_ANSI_APIS + WORD mask; + if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + WORD mask; + if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +#endif +} + +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx) +{ + WORD mask; + if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type idx) +{ + WORD mask; + wchar_t c = ca; + if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER)) + return true; + return false; +} +#endif + +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx) +{ +#ifndef BOOST_NO_ANSI_APIS + WORD mask; + if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + WORD mask; + if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +#endif +} + +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx) +{ + WORD mask; + if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type idx) +{ + WORD mask; + wchar_t c = ca; + if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER)) + return true; + return false; +} +#endif + +void free_module(void* mod) +{ + ::FreeLibrary(static_cast(mod)); +} + +BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name) +{ +#ifndef BOOST_NO_ANSI_APIS + cat_type result(::LoadLibraryA(name.c_str()), &free_module); + return result; +#else + LPWSTR wide_name = (LPWSTR)_alloca( (name.size() + 1) * sizeof(WCHAR) ); + if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0) + return cat_type(); + + cat_type result(::LoadLibraryW(wide_name), &free_module); + return result; +#endif +} + +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def) +{ +#ifndef BOOST_NO_ANSI_APIS + char buf[256]; + if(0 == ::LoadStringA( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } +#else + WCHAR wbuf[256]; + int r = ::LoadStringW( + static_cast(cat.get()), + i, + wbuf, + 256 + ); + if (r == 0) + return def; + + LPSTR buf = (LPSTR)_alloca( (r + 1) * 2 ); + if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, (r + 1) * 2, NULL, NULL) == 0) + return def; +#endif + return std::string(buf); +} + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def) +{ + wchar_t buf[256]; + if(0 == ::LoadStringW( + static_cast(cat.get()), + i, + buf, + 256 + )) + { + return def; + } + return std::wstring(buf); +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string& def) +{ + unsigned short buf[256]; + if(0 == ::LoadStringW( + static_cast(cat.get()), + i, + (LPWSTR)buf, + 256 + )) + { + return def; + } + return std::basic_string(buf); +} +#endif +#endif +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2) +{ +#ifndef BOOST_NO_ANSI_APIS + int bytes = ::LCMapStringA( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if(!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringA( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + &*result.begin(), // destination buffer + bytes // size of destination buffer + ); +#else + UINT code_page = get_code_page_for_locale_id(idx); + if(code_page == 0) + return std::string(p1, p2); + + int src_len = static_cast(p2 - p1); + LPWSTR wide_p1 = (LPWSTR)_alloca( (src_len + 1) * 2 ); + if(::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0) + return std::string(p1, p2); + + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if(!bytes) + return std::string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + wide_p1, // source string + src_len, // number of characters in source string + (LPWSTR)&*result.begin(), // destination buffer + bytes // size of destination buffer + ); +#endif + if(bytes > static_cast(result.size())) + return std::string(p1, p2); + while(result.size() && result[result.size()-1] == '\0') + { + result.erase(result.size()-1); + } + return result; +} + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2) +{ + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if(!bytes) + return std::wstring(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + p1, // source string + static_cast(p2 - p1), // number of characters in source string + reinterpret_cast(&*result.begin()), // destination buffer *of bytes* + bytes // size of destination buffer + ); + if(bytes > static_cast(result.size())) + return std::wstring(p1, p2); + while(result.size() && result[result.size()-1] == L'\0') + { + result.erase(result.size()-1); + } + std::wstring r2; + for(std::string::size_type i = 0; i < result.size(); ++i) + r2.append(1, static_cast(static_cast(result[i]))); + return r2; +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_transform(lcid_type idx, const unsigned short* p1, const unsigned short* p2) +{ + int bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + (LPCWSTR)p1, // source string + static_cast(p2 - p1), // number of characters in source string + 0, // destination buffer + 0 // size of destination buffer + ); + if(!bytes) + return std::basic_string(p1, p2); + std::string result(++bytes, '\0'); + bytes = ::LCMapStringW( + idx, // locale identifier + LCMAP_SORTKEY, // mapping transformation type + (LPCWSTR)p1, // source string + static_cast(p2 - p1), // number of characters in source string + reinterpret_cast(&*result.begin()), // destination buffer *of bytes* + bytes // size of destination buffer + ); + if(bytes > static_cast(result.size())) + return std::basic_string(p1, p2); + while(result.size() && result[result.size()-1] == L'\0') + { + result.erase(result.size()-1); + } + std::basic_string r2; + for(std::string::size_type i = 0; i < result.size(); ++i) + r2.append(1, static_cast(static_cast(result[i]))); + return r2; +} +#endif +#endif +BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx) +{ + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = ::LCMapStringA( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if (code_page == 0) + return c; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + WCHAR wide_result; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; + + if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; +#endif + return result[0]; +} + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx) +{ + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; + return result[0]; +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type idx) +{ + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_LOWERCASE, // mapping transformation type + (wchar_t const*)&c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; + return result[0]; +} +#endif +#endif +BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx) +{ + char result[2]; +#ifndef BOOST_NO_ANSI_APIS + int b = ::LCMapStringA( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if(code_page == 0) + return c; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return c; + + WCHAR wide_result; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &wide_c, // source string + 1, // number of characters in source string + &wide_result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; + + if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0) + return c; +#endif + return result[0]; +} + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx) +{ + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + &c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; + return result[0]; +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_toupper(unsigned short c, lcid_type idx) +{ + wchar_t result[2]; + int b = ::LCMapStringW( + idx, // locale identifier + LCMAP_UPPERCASE, // mapping transformation type + (wchar_t const*)&c, // source string + 1, // number of characters in source string + result, // destination buffer + 1); // size of destination buffer + if(b == 0) + return c; + return result[0]; +} +#endif +#endif +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, cutl_details_boost::uint32_t m, char c) +{ + WORD mask; +#ifndef BOOST_NO_ANSI_APIS + if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#else + UINT code_page = get_code_page_for_locale_id(idx); + if(code_page == 0) + return false; + + WCHAR wide_c; + if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0) + return false; + + if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; +#endif + if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + return false; +} + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, cutl_details_boost::uint32_t m, wchar_t c) +{ + WORD mask; + if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; + if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + if((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) + return true; + return false; +} +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, cutl_details_boost::uint32_t m, unsigned short c) +{ + WORD mask; + if(::GetStringTypeExW(idx, CT_CTYPE1, (wchar_t const*)&c, 1, &mask) && (mask & m & w32_regex_traits_implementation::mask_base)) + return true; + if((m & w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + if((m & w32_regex_traits_implementation::mask_unicode) && (c > 0xff)) + return true; + return false; +} +#endif +#endif + +} // re_detail +} // boost + +#endif + diff --git a/cutl/details/boost/regex/src/wc_regex_traits.cxx b/cutl/details/boost/regex/src/wc_regex_traits.cxx new file mode 100644 index 0000000..2c6b44b --- /dev/null +++ b/cutl/details/boost/regex/src/wc_regex_traits.cxx @@ -0,0 +1,320 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: wc_regex_traits.cpp + * VERSION: see + * DESCRIPTION: Implements out of line members for c_regex_traits + */ + + +#define BOOST_REGEX_SOURCE + +#include +#include +#include + +#if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \ + && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\ + && BOOST_WORKAROUND(BOOST_MSVC, <1600) +// +// This is a horrible workaround, but without declaring these symbols extern we get +// duplicate symbol errors when linking if the application is built without +// /Zc:wchar_t +// +#ifdef _CRTIMP2_PURE +# define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE +#else +# define BOOST_REGEX_STDLIB_DECL _CRTIMP2 +#endif + +namespace std{ + +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) +template class BOOST_REGEX_STDLIB_DECL allocator; +template class BOOST_REGEX_STDLIB_DECL _String_val >; +template class BOOST_REGEX_STDLIB_DECL basic_string, allocator >; +#endif + +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400)) +template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits::length(unsigned short const*); +#endif + +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( + const basic_string, allocator >&, + const basic_string, allocator >&); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( + const unsigned short *, + const basic_string, allocator >&); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==( + const basic_string, allocator >&, + const unsigned short *); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<( + const basic_string, allocator >&, + const basic_string, allocator >&); +template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>( + const basic_string, allocator >&, + const basic_string, allocator >&); +} +#endif + +#include +#include + +#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560) + +#include +#ifndef BOOST_NO_WREGEX +#include +#include + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::wcstol; +} +#endif + +namespace cutl_details_boost{ + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const wchar_t* p1, const wchar_t* p2) +{ + std::size_t r; + std::size_t s = 10; + std::wstring src(p1, p2); + std::wstring result(s, L' '); + while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s))) + { + result.append(r - s + 3, L' '); + s = result.size(); + } + result.erase(r); + return result; +} + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const wchar_t* p1, const wchar_t* p2) +{ + static wchar_t s_delim; + static const int s_collate_type = ::cutl_details_boost::re_detail::find_sort_syntax(static_cast*>(0), &s_delim); + std::wstring result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(s_collate_type) + { + case ::cutl_details_boost::re_detail::sort_C: + case ::cutl_details_boost::re_detail::sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + for(std::wstring::size_type i = 0; i < result.size(); ++i) + result[i] = (std::towlower)(result[i]); + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case ::cutl_details_boost::re_detail::sort_fixed: + { + // get a regular sort key, and then truncate it: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + result.erase(s_delim); + break; + } + case ::cutl_details_boost::re_detail::sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result = c_regex_traits::transform(&*result.begin(), &*result.begin() + result.size()); + if(result.size() && (result[0] == s_delim)) + break; + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == s_delim) + break; + } + result.erase(i); + break; + } + if(result.empty()) + result = std::wstring(1, char(0)); + return result; +} + +enum +{ + char_class_space=1<<0, + char_class_print=1<<1, + char_class_cntrl=1<<2, + char_class_upper=1<<3, + char_class_lower=1<<4, + char_class_alpha=1<<5, + char_class_digit=1<<6, + char_class_punct=1<<7, + char_class_xdigit=1<<8, + char_class_alnum=char_class_alpha|char_class_digit, + char_class_graph=char_class_alnum|char_class_punct, + char_class_blank=1<<9, + char_class_word=1<<10, + char_class_unicode=1<<11, + char_class_horizontal=1<<12, + char_class_vertical=1<<13 +}; + +c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const wchar_t* p1, const wchar_t* p2) +{ + static const char_class_type masks[] = + { + 0, + char_class_alnum, + char_class_alpha, + char_class_blank, + char_class_cntrl, + char_class_digit, + char_class_digit, + char_class_graph, + char_class_horizontal, + char_class_lower, + char_class_lower, + char_class_print, + char_class_punct, + char_class_space, + char_class_space, + char_class_upper, + char_class_unicode, + char_class_upper, + char_class_vertical, + char_class_alnum | char_class_word, + char_class_alnum | char_class_word, + char_class_xdigit, + }; + + int idx = ::cutl_details_boost::re_detail::get_default_class_id(p1, p2); + if(idx < 0) + { + std::wstring s(p1, p2); + for(std::wstring::size_type i = 0; i < s.size(); ++i) + s[i] = (std::towlower)(s[i]); + idx = ::cutl_details_boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size()); + } + BOOST_ASSERT(idx+1 < static_cast(sizeof(masks) / sizeof(masks[0]))); + return masks[idx+1]; +} + +bool BOOST_REGEX_CALL c_regex_traits::isctype(wchar_t c, char_class_type mask) +{ + return + ((mask & char_class_space) && (std::iswspace)(c)) + || ((mask & char_class_print) && (std::iswprint)(c)) + || ((mask & char_class_cntrl) && (std::iswcntrl)(c)) + || ((mask & char_class_upper) && (std::iswupper)(c)) + || ((mask & char_class_lower) && (std::iswlower)(c)) + || ((mask & char_class_alpha) && (std::iswalpha)(c)) + || ((mask & char_class_digit) && (std::iswdigit)(c)) + || ((mask & char_class_punct) && (std::iswpunct)(c)) + || ((mask & char_class_xdigit) && (std::iswxdigit)(c)) + || ((mask & char_class_blank) && (std::iswspace)(c) && !::cutl_details_boost::re_detail::is_separator(c)) + || ((mask & char_class_word) && (c == '_')) + || ((mask & char_class_unicode) && (c & ~static_cast(0xff))) + || ((mask & char_class_vertical) && (::cutl_details_boost::re_detail::is_separator(c) || (c == L'\v'))) + || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::cutl_details_boost::re_detail::is_separator(c) && (c != L'\v')); +} + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const wchar_t* p1, const wchar_t* p2) +{ +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + std::string name(p1, p2); +#else + std::string name; + const wchar_t* p0 = p1; + while(p0 != p2) + name.append(1, char(*p0++)); +#endif + name = ::cutl_details_boost::re_detail::lookup_default_collate_name(name); +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + if(name.size()) + return string_type(name.begin(), name.end()); +#else + if(name.size()) + { + string_type result; + typedef std::string::const_iterator iter; + iter b = name.begin(); + iter e = name.end(); + while(b != e) + result.append(1, wchar_t(*b++)); + return result; + } +#endif + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +int BOOST_REGEX_CALL c_regex_traits::value(wchar_t c, int radix) +{ +#ifdef __BORLANDC__ + // workaround for broken wcstol: + if((std::iswxdigit)(c) == 0) + return -1; +#endif + wchar_t b[2] = { c, '\0', }; + wchar_t* ep; + int result = std::wcstol(b, &ep, radix); + if(ep == b) + return -1; + return result; +} + +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform(const unsigned short* p1, const unsigned short* p2) +{ + std::wstring result = c_regex_traits::transform((const wchar_t*)p1, (const wchar_t*)p2); + return string_type(result.begin(), result.end()); +} + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::transform_primary(const unsigned short* p1, const unsigned short* p2) +{ + std::wstring result = c_regex_traits::transform_primary((const wchar_t*)p1, (const wchar_t*)p2); + return string_type(result.begin(), result.end()); +} + +c_regex_traits::char_class_type BOOST_REGEX_CALL c_regex_traits::lookup_classname(const unsigned short* p1, const unsigned short* p2) +{ + return c_regex_traits::lookup_classname((const wchar_t*)p1, (const wchar_t*)p2); +} + +c_regex_traits::string_type BOOST_REGEX_CALL c_regex_traits::lookup_collatename(const unsigned short* p1, const unsigned short* p2) +{ + std::wstring result = c_regex_traits::lookup_collatename((const wchar_t*)p1, (const wchar_t*)p2); + return string_type(result.begin(), result.end()); +} + +bool BOOST_REGEX_CALL c_regex_traits::isctype(unsigned short c, char_class_type m) +{ + return c_regex_traits::isctype(c, m); +} + +int BOOST_REGEX_CALL c_regex_traits::value(unsigned short c, int radix) +{ + return c_regex_traits::value(c, radix); +} + +#endif + +} + +#endif // BOOST_NO_WREGEX + +#endif // __BORLANDC__ + diff --git a/cutl/details/boost/regex/src/wide_posix_api.cxx b/cutl/details/boost/regex/src/wide_posix_api.cxx new file mode 100644 index 0000000..d132480 --- /dev/null +++ b/cutl/details/boost/regex/src/wide_posix_api.cxx @@ -0,0 +1,315 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: wide_posix_api.cpp + * VERSION: see + * DESCRIPTION: Implements the wide character POSIX API wrappers. + */ + +#define BOOST_REGEX_SOURCE + +#include + +#ifndef BOOST_NO_WREGEX + +#include +#include + +#include +#include +#include + +#ifdef BOOST_INTEL +#pragma warning(disable:981) +#endif + +#if defined(BOOST_NO_STDC_NAMESPACE) || defined(__NetBSD__) +namespace std{ +# ifndef BOOST_NO_SWPRINTF + using ::swprintf; +# endif +} +#endif + + +namespace cutl_details_boost{ + +namespace { + +unsigned int wmagic_value = 28631; + +const wchar_t* wnames[] = { + L"REG_NOERROR", + L"REG_NOMATCH", + L"REG_BADPAT", + L"REG_ECOLLATE", + L"REG_ECTYPE", + L"REG_EESCAPE", + L"REG_ESUBREG", + L"REG_EBRACK", + L"REG_EPAREN", + L"REG_EBRACE", + L"REG_BADBR", + L"REG_ERANGE", + L"REG_ESPACE", + L"REG_BADRPT", + L"REG_EEND", + L"REG_ESIZE", + L"REG_ERPAREN", + L"REG_EMPTY", + L"REG_ECOMPLEXITY", + L"REG_ESTACK", + L"REG_E_PERL", + L"REG_E_UNKNOWN", +}; +} + +typedef cutl_details_boost::basic_regex > c_regex_type; + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW* expression, const wchar_t* ptr, int f) +{ + if(expression->re_magic != wmagic_value) + { + expression->guts = 0; +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + expression->guts = new c_regex_type(); +#ifndef BOOST_NO_EXCEPTIONS + } catch(...) + { + return REG_ESPACE; + } +#else + if(0 == expression->guts) + return REG_E_MEMORY; +#endif + } + // set default flags: + cutl_details_boost::uint_fast32_t flags = (f & REG_PERLEX) ? 0 : ((f & REG_EXTENDED) ? wregex::extended : wregex::basic); + expression->eflags = (f & REG_NEWLINE) ? match_not_dot_newline : match_default; + + // and translate those that are actually set: + if(f & REG_NOCOLLATE) + { + flags |= wregex::nocollate; +#ifndef BOOST_REGEX_V3 + flags &= ~wregex::collate; +#endif + } + + if(f & REG_NOSUB) + { + //expression->eflags |= match_any; + flags |= wregex::nosubs; + } + + if(f & REG_NOSPEC) + flags |= wregex::literal; + if(f & REG_ICASE) + flags |= wregex::icase; + if(f & REG_ESCAPE_IN_LISTS) + flags &= ~wregex::no_escape_in_lists; + if(f & REG_NEWLINE_ALT) + flags |= wregex::newline_alt; + + const wchar_t* p2; + if(f & REG_PEND) + p2 = expression->re_endp; + else p2 = ptr + std::wcslen(ptr); + + int result; + +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + expression->re_magic = wmagic_value; + static_cast(expression->guts)->set_expression(ptr, p2, flags); + expression->re_nsub = static_cast(expression->guts)->mark_count() - 1; + result = static_cast(expression->guts)->error_code(); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(const cutl_details_boost::regex_error& be) + { + result = be.code(); + } + catch(...) + { + result = REG_E_UNKNOWN; + } +#endif + if(result) + regfreeW(expression); + return result; + +} + +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorW(int code, const regex_tW* e, wchar_t* buf, regsize_t buf_size) +{ + std::size_t result = 0; + if(code & REG_ITOA) + { + code &= ~REG_ITOA; + if((code <= (int)REG_E_UNKNOWN) && (code >= 0)) + { + result = std::wcslen(wnames[code]) + 1; + if(buf_size >= result) +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) + ::wcscpy_s(buf, buf_size, wnames[code]); +#else + std::wcscpy(buf, wnames[code]); +#endif + return result; + } + return result; + } +#if !defined(BOOST_NO_SWPRINTF) + if(code == REG_ATOI) + { + wchar_t localbuf[5]; + if(e == 0) + return 0; + for(int i = 0; i <= (int)REG_E_UNKNOWN; ++i) + { + if(std::wcscmp(e->re_endp, wnames[i]) == 0) + { +#if defined(_WIN32_WCE) && !defined(UNDER_CE) + (std::swprintf)(localbuf, L"%d", i); +#else + (std::swprintf)(localbuf, 5, L"%d", i); +#endif + if(std::wcslen(localbuf) < buf_size) +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) + ::wcscpy_s(buf, buf_size, localbuf); +#else + std::wcscpy(buf, localbuf); +#endif + return std::wcslen(localbuf) + 1; + } + } +#if defined(_WIN32_WCE) && !defined(UNDER_CE) + (std::swprintf)(localbuf, L"%d", 0); +#else + (std::swprintf)(localbuf, 5, L"%d", 0); +#endif + if(std::wcslen(localbuf) < buf_size) +#if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE) + ::wcscpy_s(buf, buf_size, localbuf); +#else + std::wcscpy(buf, localbuf); +#endif + return std::wcslen(localbuf) + 1; + } +#endif + if(code <= (int)REG_E_UNKNOWN) + { + std::string p; + if((e) && (e->re_magic == wmagic_value)) + p = static_cast(e->guts)->get_traits().error_string(static_cast< ::cutl_details_boost::regex_constants::error_type>(code)); + else + { + p = re_detail::get_default_error_string(static_cast< ::cutl_details_boost::regex_constants::error_type>(code)); + } + std::size_t len = p.size(); + if(len < buf_size) + { + re_detail::copy(p.c_str(), p.c_str() + p.size() + 1, buf); + } + return len + 1; + } + if(buf_size) + *buf = 0; + return 0; +} + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW* expression, const wchar_t* buf, regsize_t n, regmatch_t* array, int eflags) +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4267) +#endif + bool result = false; + match_flag_type flags = match_default | expression->eflags; + const wchar_t* end; + const wchar_t* start; + wcmatch m; + + if(eflags & REG_NOTBOL) + flags |= match_not_bol; + if(eflags & REG_NOTEOL) + flags |= match_not_eol; + if(eflags & REG_STARTEND) + { + start = buf + array[0].rm_so; + end = buf + array[0].rm_eo; + } + else + { + start = buf; + end = buf + std::wcslen(buf); + } + +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + if(expression->re_magic == wmagic_value) + { + result = regex_search(start, end, m, *static_cast(expression->guts), flags); + } + else + return result; +#ifndef BOOST_NO_EXCEPTIONS + } catch(...) + { + return REG_E_UNKNOWN; + } +#endif + if(result) + { + // extract what matched: + std::size_t i; + for(i = 0; (i < n) && (i < expression->re_nsub + 1); ++i) + { + array[i].rm_so = (m[i].matched == false) ? -1 : (m[i].first - buf); + array[i].rm_eo = (m[i].matched == false) ? -1 : (m[i].second - buf); + } + // and set anything else to -1: + for(i = expression->re_nsub + 1; i < n; ++i) + { + array[i].rm_so = -1; + array[i].rm_eo = -1; + } + return 0; + } + return REG_NOMATCH; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW* expression) +{ + if(expression->re_magic == wmagic_value) + { + delete static_cast(expression->guts); + } + expression->re_magic = 0; +} + +} // namespace cutl_details_boost; + +#endif + + + + diff --git a/cutl/details/boost/regex/src/winstances.cxx b/cutl/details/boost/regex/src/winstances.cxx new file mode 100644 index 0000000..19ba8a4 --- /dev/null +++ b/cutl/details/boost/regex/src/winstances.cxx @@ -0,0 +1,35 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: winstances.cpp + * VERSION: see + * DESCRIPTION: regex wide character template instances. + */ + +#define BOOST_REGEX_SOURCE + +#include + +#if !defined(BOOST_NO_WREGEX) && !defined(BOOST_REGEX_NO_EXTERNAL_TEMPLATES) +#define BOOST_REGEX_WIDE_INSTANTIATE + +#ifdef __BORLANDC__ +#pragma hrdstop +#endif + +#include + +#endif + + + diff --git a/cutl/details/boost/regex/user.hpp b/cutl/details/boost/regex/user.hpp new file mode 100644 index 0000000..b4ab5c4 --- /dev/null +++ b/cutl/details/boost/regex/user.hpp @@ -0,0 +1,90 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE user.hpp + * VERSION see + * DESCRIPTION: User settable options. + */ + +// define if you want the regex library to use the C locale +// even on Win32: +// #define BOOST_REGEX_USE_C_LOCALE + +// define this is you want the regex library to use the C++ +// locale: +// #define BOOST_REGEX_USE_CPP_LOCALE + +// define this if the runtime library is a dll, and you +// want BOOST_REGEX_DYN_LINK to set up dll exports/imports +// with __declspec(dllexport)/__declspec(dllimport.) +// #define BOOST_REGEX_HAS_DLL_RUNTIME + +// define this if you want to dynamically link to regex, +// if the runtime library is also a dll (Probably Win32 specific, +// and has no effect unless BOOST_REGEX_HAS_DLL_RUNTIME is set): +// #define BOOST_REGEX_DYN_LINK + +// define this if you don't want the lib to automatically +// select its link libraries: +// #define BOOST_REGEX_NO_LIB + +// define this if templates with switch statements cause problems: +// #define BOOST_REGEX_NO_TEMPLATE_SWITCH_MERGE + +// define this to disable Win32 support when available: +// #define BOOST_REGEX_NO_W32 + +// define this if bool is not a real type: +// #define BOOST_REGEX_NO_BOOL + +// define this if no template instances are to be placed in +// the library rather than users object files: +// #define BOOST_REGEX_NO_EXTERNAL_TEMPLATES + +// define this if the forward declarations in regex_fwd.hpp +// cause more problems than they are worth: +// #define BOOST_REGEX_NO_FWD + +// define this if your compiler supports MS Windows structured +// exception handling. +// #define BOOST_REGEX_HAS_MS_STACK_GUARD + +// define this if you want to use the recursive algorithm +// even if BOOST_REGEX_HAS_MS_STACK_GUARD is not defined. +// #define BOOST_REGEX_RECURSIVE + +// define this if you want to use the non-recursive +// algorithm, even if the recursive version would be the default. +// #define BOOST_REGEX_NON_RECURSIVE + +// define this if you want to set the size of the memory blocks +// used by the non-recursive algorithm. +// #define BOOST_REGEX_BLOCKSIZE 4096 + +// define this if you want to set the maximum number of memory blocks +// used by the non-recursive algorithm. +// #define BOOST_REGEX_MAX_BLOCKS 1024 + +// define this if you want to set the maximum number of memory blocks +// cached by the non-recursive algorithm: Normally this is 16, but can be +// higher if you have multiple threads all using boost.regex, or lower +// if you don't want boost.regex to cache memory. +// #define BOOST_REGEX_MAX_CACHE_BLOCKS 16 + +// define this if you want to be able to access extended capture +// information in your sub_match's (caution this will slow things +// down quite a bit). +// #define BOOST_REGEX_MATCH_EXTRA + +// define this if you want to enable support for Unicode via ICU. +// #define BOOST_HAS_ICU diff --git a/cutl/details/boost/regex/v4/basic_regex.hpp b/cutl/details/boost/regex/v4/basic_regex.hpp new file mode 100644 index 0000000..d55cea0 --- /dev/null +++ b/cutl/details/boost/regex/v4/basic_regex.hpp @@ -0,0 +1,779 @@ +/* + * + * Copyright (c) 1998-2004 + * John Maddock + * + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org/ for most recent version. + * FILE basic_regex.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex. + */ + +#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP +#define BOOST_REGEX_V4_BASIC_REGEX_HPP + +#include +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660 4800) +#endif + +namespace re_detail{ + +// +// forward declaration, we will need this one later: +// +template +class basic_regex_parser; + +template +void bubble_down_one(I first, I last) +{ + if(first != last) + { + I next = last - 1; + while((next != first) && (*next < *(next-1))) + { + (next-1)->swap(*next); + --next; + } + } +} + +template +inline int hash_value_from_capture_name(Iterator i, Iterator j) +{ + std::size_t r = cutl_details_boost::hash_range(i, j); + r %= ((std::numeric_limits::max)() - 10001); + r += 10000; + return static_cast(r); +} + +class named_subexpressions +{ +public: + struct name + { + template + name(const charT* i, const charT* j, int idx) + : index(idx) + { + hash = hash_value_from_capture_name(i, j); + } + name(int h, int idx) + : index(idx), hash(h) + { + } + int index; + int hash; + bool operator < (const name& other)const + { + return hash < other.hash; + } + bool operator == (const name& other)const + { + return hash == other.hash; + } + void swap(name& other) + { + std::swap(index, other.index); + std::swap(hash, other.hash); + } + }; + + typedef std::vector::const_iterator const_iterator; + typedef std::pair range_type; + + named_subexpressions(){} + + template + void set_name(const charT* i, const charT* j, int index) + { + m_sub_names.push_back(name(i, j, index)); + bubble_down_one(m_sub_names.begin(), m_sub_names.end()); + } + template + int get_id(const charT* i, const charT* j)const + { + name t(i, j, 0); + typename std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + template + range_type equal_range(const charT* i, const charT* j)const + { + name t(i, j, 0); + return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); + } + int get_id(int h)const + { + name t(h, 0); + std::vector::const_iterator pos = std::lower_bound(m_sub_names.begin(), m_sub_names.end(), t); + if((pos != m_sub_names.end()) && (*pos == t)) + { + return pos->index; + } + return -1; + } + range_type equal_range(int h)const + { + name t(h, 0); + return std::equal_range(m_sub_names.begin(), m_sub_names.end(), t); + } +private: + std::vector m_sub_names; +}; + +// +// class regex_data: +// represents the data we wish to expose to the matching algorithms. +// +template +struct regex_data : public named_subexpressions +{ + typedef regex_constants::syntax_option_type flag_type; + typedef std::size_t size_type; + + regex_data(const ::cutl_details_boost::shared_ptr< + ::cutl_details_boost::regex_traits_wrapper >& t) + : m_ptraits(t), m_expression(0), m_expression_len(0) {} + regex_data() + : m_ptraits(new ::cutl_details_boost::regex_traits_wrapper()), m_expression(0), m_expression_len(0) {} + + ::cutl_details_boost::shared_ptr< + ::cutl_details_boost::regex_traits_wrapper + > m_ptraits; // traits class instance + flag_type m_flags; // flags with which we were compiled + int m_status; // error code (0 implies OK). + const charT* m_expression; // the original expression + std::ptrdiff_t m_expression_len; // the length of the original expression + size_type m_mark_count; // the number of marked sub-expressions + re_detail::re_syntax_base* m_first_state; // the first state of the machine + unsigned m_restart_type; // search optimisation type + unsigned char m_startmap[1 << CHAR_BIT]; // which characters can start a match + unsigned int m_can_be_null; // whether we can match a null string + re_detail::raw_storage m_data; // the buffer in which our states are constructed + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + std::vector< + std::pair< + std::size_t, std::size_t> > m_subs; // Position of sub-expressions within the *string*. + bool m_has_recursions; // whether we have recursive expressions; +}; +// +// class basic_regex_implementation +// pimpl implementation class for basic_regex. +// +template +class basic_regex_implementation + : public regex_data +{ +public: + typedef regex_constants::syntax_option_type flag_type; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef typename traits::locale_type locale_type; + typedef const charT* const_iterator; + + basic_regex_implementation(){} + basic_regex_implementation(const ::cutl_details_boost::shared_ptr< + ::cutl_details_boost::regex_traits_wrapper >& t) + : regex_data(t) {} + void assign(const charT* arg_first, + const charT* arg_last, + flag_type f) + { + regex_data* pdat = this; + basic_regex_parser parser(pdat); + parser.parse(arg_first, arg_last, f); + } + + locale_type BOOST_REGEX_CALL imbue(locale_type l) + { + return this->m_ptraits->imbue(l); + } + locale_type BOOST_REGEX_CALL getloc()const + { + return this->m_ptraits->getloc(); + } + std::basic_string BOOST_REGEX_CALL str()const + { + std::basic_string result; + if(this->m_status == 0) + result = std::basic_string(this->m_expression, this->m_expression_len); + return result; + } + const_iterator BOOST_REGEX_CALL expression()const + { + return this->m_expression; + } + std::pair BOOST_REGEX_CALL subexpression(std::size_t n)const + { + if(n == 0) + throw std::out_of_range("0 is not a valid subexpression index."); + const std::pair& pi = this->m_subs.at(n - 1); + std::pair p(expression() + pi.first, expression() + pi.second); + return p; + } + // + // begin, end: + const_iterator BOOST_REGEX_CALL begin()const + { + return (!this->m_status ? 0 : this->m_expression); + } + const_iterator BOOST_REGEX_CALL end()const + { + return (!this->m_status ? 0 : this->m_expression + this->m_expression_len); + } + flag_type BOOST_REGEX_CALL flags()const + { + return this->m_flags; + } + size_type BOOST_REGEX_CALL size()const + { + return this->m_expression_len; + } + int BOOST_REGEX_CALL status()const + { + return this->m_status; + } + size_type BOOST_REGEX_CALL mark_count()const + { + return this->m_mark_count; + } + const re_detail::re_syntax_base* get_first_state()const + { + return this->m_first_state; + } + unsigned get_restart_type()const + { + return this->m_restart_type; + } + const unsigned char* get_map()const + { + return this->m_startmap; + } + const ::cutl_details_boost::regex_traits_wrapper& get_traits()const + { + return *(this->m_ptraits); + } + bool can_be_null()const + { + return this->m_can_be_null; + } + const regex_data& get_data()const + { + basic_regex_implementation const* p = this; + return *static_cast*>(p); + } +}; + +} // namespace re_detail +// +// class basic_regex: +// represents the compiled +// regular expression: +// + +#ifdef BOOST_REGEX_NO_FWD +template > +#else +template +#endif +class basic_regex : public regbase +{ +public: + // typedefs: + typedef std::size_t traits_size_type; + typedef typename traits::string_type traits_string_type; + typedef charT char_type; + typedef traits traits_type; + + typedef charT value_type; + typedef charT& reference; + typedef const charT& const_reference; + typedef const charT* const_iterator; + typedef const_iterator iterator; + typedef std::ptrdiff_t difference_type; + typedef std::size_t size_type; + typedef regex_constants::syntax_option_type flag_type; + // locale_type + // placeholder for actual locale type used by the + // traits class to localise *this. + typedef typename traits::locale_type locale_type; + +public: + explicit basic_regex(){} + explicit basic_regex(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + basic_regex(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f); + } + basic_regex(const charT* p, size_type len, flag_type f) + { + assign(p, len, f); + } + basic_regex(const basic_regex& that) + : m_pimpl(that.m_pimpl) {} + ~basic_regex(){} + basic_regex& BOOST_REGEX_CALL operator=(const basic_regex& that) + { + return assign(that); + } + basic_regex& BOOST_REGEX_CALL operator=(const charT* ptr) + { + return assign(ptr); + } + + // + // assign: + basic_regex& assign(const basic_regex& that) + { + m_pimpl = that.m_pimpl; + return *this; + } + basic_regex& assign(const charT* p, flag_type f = regex_constants::normal) + { + return assign(p, p + traits::length(p), f); + } + basic_regex& assign(const charT* p, size_type len, flag_type f) + { + return assign(p, p + len, f); + } +private: + basic_regex& do_assign(const charT* p1, + const charT* p2, + flag_type f); +public: + basic_regex& assign(const charT* p1, + const charT* p2, + flag_type f = regex_constants::normal) + { + return do_assign(p1, p2, f); + } +#if !defined(BOOST_NO_MEMBER_TEMPLATES) + + template + unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + { + return set_expression(p.data(), p.data() + p.size(), f); + } + + template + explicit basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + + template + basic_regex(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(a.size()) + assign(&*a.begin(), &*a.begin() + a.size(), f); + else + assign(static_cast(0), static_cast(0), f); + } + + template + basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string& p) + { + return assign(p.data(), p.data() + p.size(), regex_constants::normal); + } + + template + basic_regex& BOOST_REGEX_CALL assign( + const std::basic_string& s, + flag_type f = regex_constants::normal) + { + return assign(s.data(), s.data() + s.size(), f); + } + + template + basic_regex& BOOST_REGEX_CALL assign(InputIterator arg_first, + InputIterator arg_last, + flag_type f = regex_constants::normal) + { + typedef typename traits::string_type seq_type; + seq_type a(arg_first, arg_last); + if(a.size()) + { + const charT* p1 = &*a.begin(); + const charT* p2 = &*a.begin() + a.size(); + return assign(p1, p2, f); + } + return assign(static_cast(0), static_cast(0), f); + } +#else + unsigned int BOOST_REGEX_CALL set_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + { + return set_expression(p.data(), p.data() + p.size(), f); + } + + basic_regex(const std::basic_string& p, flag_type f = regex_constants::normal) + { + assign(p, f); + } + + basic_regex& BOOST_REGEX_CALL operator=(const std::basic_string& p) + { + return assign(p.data(), p.data() + p.size(), regex_constants::normal); + } + + basic_regex& BOOST_REGEX_CALL assign( + const std::basic_string& s, + flag_type f = regex_constants::normal) + { + return assign(s.data(), s.data() + s.size(), f); + } + +#endif + + // + // locale: + locale_type BOOST_REGEX_CALL imbue(locale_type l); + locale_type BOOST_REGEX_CALL getloc()const + { + return m_pimpl.get() ? m_pimpl->getloc() : locale_type(); + } + // + // getflags: + // retained for backwards compatibility only, "flags" + // is now the preferred name: + flag_type BOOST_REGEX_CALL getflags()const + { + return flags(); + } + flag_type BOOST_REGEX_CALL flags()const + { + return m_pimpl.get() ? m_pimpl->flags() : 0; + } + // + // str: + std::basic_string BOOST_REGEX_CALL str()const + { + return m_pimpl.get() ? m_pimpl->str() : std::basic_string(); + } + // + // begin, end, subexpression: + std::pair BOOST_REGEX_CALL subexpression(std::size_t n)const + { + if(!m_pimpl.get()) + throw std::logic_error("Can't access subexpressions in an invalid regex."); + return m_pimpl->subexpression(n); + } + const_iterator BOOST_REGEX_CALL begin()const + { + return (m_pimpl.get() ? m_pimpl->begin() : 0); + } + const_iterator BOOST_REGEX_CALL end()const + { + return (m_pimpl.get() ? m_pimpl->end() : 0); + } + // + // swap: + void BOOST_REGEX_CALL swap(basic_regex& that)throw() + { + m_pimpl.swap(that.m_pimpl); + } + // + // size: + size_type BOOST_REGEX_CALL size()const + { + return (m_pimpl.get() ? m_pimpl->size() : 0); + } + // + // max_size: + size_type BOOST_REGEX_CALL max_size()const + { + return UINT_MAX; + } + // + // empty: + bool BOOST_REGEX_CALL empty()const + { + return (m_pimpl.get() ? 0 != m_pimpl->status() : true); + } + + size_type BOOST_REGEX_CALL mark_count()const + { + return (m_pimpl.get() ? m_pimpl->mark_count() : 0); + } + + int status()const + { + return (m_pimpl.get() ? m_pimpl->status() : regex_constants::error_empty); + } + + int BOOST_REGEX_CALL compare(const basic_regex& that) const + { + if(m_pimpl.get() == that.m_pimpl.get()) + return 0; + if(!m_pimpl.get()) + return -1; + if(!that.m_pimpl.get()) + return 1; + if(status() != that.status()) + return status() - that.status(); + if(flags() != that.flags()) + return flags() - that.flags(); + return str().compare(that.str()); + } + bool BOOST_REGEX_CALL operator==(const basic_regex& e)const + { + return compare(e) == 0; + } + bool BOOST_REGEX_CALL operator != (const basic_regex& e)const + { + return compare(e) != 0; + } + bool BOOST_REGEX_CALL operator<(const basic_regex& e)const + { + return compare(e) < 0; + } + bool BOOST_REGEX_CALL operator>(const basic_regex& e)const + { + return compare(e) > 0; + } + bool BOOST_REGEX_CALL operator<=(const basic_regex& e)const + { + return compare(e) <= 0; + } + bool BOOST_REGEX_CALL operator>=(const basic_regex& e)const + { + return compare(e) >= 0; + } + + // + // The following are deprecated as public interfaces + // but are available for compatibility with earlier versions. + const charT* BOOST_REGEX_CALL expression()const + { + return (m_pimpl.get() && !m_pimpl->status() ? m_pimpl->expression() : 0); + } + unsigned int BOOST_REGEX_CALL set_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + { + assign(p1, p2, f | regex_constants::no_except); + return status(); + } + unsigned int BOOST_REGEX_CALL set_expression(const charT* p, flag_type f = regex_constants::normal) + { + assign(p, f | regex_constants::no_except); + return status(); + } + unsigned int BOOST_REGEX_CALL error_code()const + { + return status(); + } + // + // private access methods: + // + const re_detail::re_syntax_base* get_first_state()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_first_state(); + } + unsigned get_restart_type()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_restart_type(); + } + const unsigned char* get_map()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_map(); + } + const ::cutl_details_boost::regex_traits_wrapper& get_traits()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_traits(); + } + bool can_be_null()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->can_be_null(); + } + const re_detail::regex_data& get_data()const + { + BOOST_ASSERT(0 != m_pimpl.get()); + return m_pimpl->get_data(); + } + cutl_details_boost::shared_ptr get_named_subs()const + { + return m_pimpl; + } + +private: + shared_ptr > m_pimpl; +}; + +// +// out of line members; +// these are the only members that mutate the basic_regex object, +// and are designed to provide the strong exception guarentee +// (in the event of a throw, the state of the object remains unchanged). +// +template +basic_regex& basic_regex::do_assign(const charT* p1, + const charT* p2, + flag_type f) +{ + shared_ptr > temp; + if(!m_pimpl.get()) + { + temp = shared_ptr >(new re_detail::basic_regex_implementation()); + } + else + { + temp = shared_ptr >(new re_detail::basic_regex_implementation(m_pimpl->m_ptraits)); + } + temp->assign(p1, p2, f); + temp.swap(m_pimpl); + return *this; +} + +template +typename basic_regex::locale_type BOOST_REGEX_CALL basic_regex::imbue(locale_type l) +{ + shared_ptr > temp(new re_detail::basic_regex_implementation()); + locale_type result = temp->imbue(l); + temp.swap(m_pimpl); + return result; +} + +// +// non-members: +// +template +void swap(basic_regex& e1, basic_regex& e2) +{ + e1.swap(e2); +} + +#ifndef BOOST_NO_STD_LOCALE +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const basic_regex& e) +{ + return (os << e.str()); +} +#else +template +std::ostream& operator << (std::ostream& os, const basic_regex& e) +{ + return (os << e.str()); +} +#endif + +// +// class reg_expression: +// this is provided for backwards compatibility only, +// it is deprecated, no not use! +// +#ifdef BOOST_REGEX_NO_FWD +template > +#else +template +#endif +class reg_expression : public basic_regex +{ +public: + typedef typename basic_regex::flag_type flag_type; + typedef typename basic_regex::size_type size_type; + explicit reg_expression(){} + explicit reg_expression(const charT* p, flag_type f = regex_constants::normal) + : basic_regex(p, f){} + reg_expression(const charT* p1, const charT* p2, flag_type f = regex_constants::normal) + : basic_regex(p1, p2, f){} + reg_expression(const charT* p, size_type len, flag_type f) + : basic_regex(p, len, f){} + reg_expression(const reg_expression& that) + : basic_regex(that) {} + ~reg_expression(){} + reg_expression& BOOST_REGEX_CALL operator=(const reg_expression& that) + { + return this->assign(that); + } + +#if !defined(BOOST_NO_MEMBER_TEMPLATES) + template + explicit reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + : basic_regex(p, f) + { + } + + template + reg_expression(InputIterator arg_first, InputIterator arg_last, flag_type f = regex_constants::normal) + : basic_regex(arg_first, arg_last, f) + { + } + + template + reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string& p) + { + this->assign(p); + return *this; + } +#else + explicit reg_expression(const std::basic_string& p, flag_type f = regex_constants::normal) + : basic_regex(p, f) + { + } + + reg_expression& BOOST_REGEX_CALL operator=(const std::basic_string& p) + { + this->assign(p); + return *this; + } +#endif + +}; + +#ifdef BOOST_MSVC +#pragma warning (pop) +#endif + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/basic_regex_creator.hpp b/cutl/details/boost/regex/v4/basic_regex_creator.hpp new file mode 100644 index 0000000..a4599f1 --- /dev/null +++ b/cutl/details/boost/regex/v4/basic_regex_creator.hpp @@ -0,0 +1,1560 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_creator.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_creator which fills in + * the data members of a regex_data object. + */ + +#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP +#define BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ + +namespace re_detail{ + +template +struct digraph : public std::pair +{ + digraph() : std::pair(0, 0){} + digraph(charT c1) : std::pair(c1, 0){} + digraph(charT c1, charT c2) : std::pair(c1, c2) + {} +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) + digraph(const digraph& d) : std::pair(d.first, d.second){} +#endif + template + digraph(const Seq& s) : std::pair() + { + BOOST_ASSERT(s.size() <= 2); + BOOST_ASSERT(s.size()); + this->first = s[0]; + this->second = (s.size() > 1) ? s[1] : 0; + } +}; + +template +class basic_char_set +{ +public: + typedef digraph digraph_type; + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type mask_type; + + basic_char_set() + { + m_negate = false; + m_has_digraphs = false; + m_classes = 0; + m_negated_classes = 0; + m_empty = true; + } + + void add_single(const digraph_type& s) + { + m_singles.insert(m_singles.end(), s); + if(s.second) + m_has_digraphs = true; + m_empty = false; + } + void add_range(const digraph_type& first, const digraph_type& end) + { + m_ranges.insert(m_ranges.end(), first); + m_ranges.insert(m_ranges.end(), end); + if(first.second) + { + m_has_digraphs = true; + add_single(first); + } + if(end.second) + { + m_has_digraphs = true; + add_single(end); + } + m_empty = false; + } + void add_class(mask_type m) + { + m_classes |= m; + m_empty = false; + } + void add_negated_class(mask_type m) + { + m_negated_classes |= m; + m_empty = false; + } + void add_equivalent(const digraph_type& s) + { + m_equivalents.insert(m_equivalents.end(), s); + if(s.second) + { + m_has_digraphs = true; + add_single(s); + } + m_empty = false; + } + void negate() + { + m_negate = true; + //m_empty = false; + } + + // + // accessor functions: + // + bool has_digraphs()const + { + return m_has_digraphs; + } + bool is_negated()const + { + return m_negate; + } + typedef typename std::vector::const_iterator list_iterator; + list_iterator singles_begin()const + { + return m_singles.begin(); + } + list_iterator singles_end()const + { + return m_singles.end(); + } + list_iterator ranges_begin()const + { + return m_ranges.begin(); + } + list_iterator ranges_end()const + { + return m_ranges.end(); + } + list_iterator equivalents_begin()const + { + return m_equivalents.begin(); + } + list_iterator equivalents_end()const + { + return m_equivalents.end(); + } + mask_type classes()const + { + return m_classes; + } + mask_type negated_classes()const + { + return m_negated_classes; + } + bool empty()const + { + return m_empty; + } +private: + std::vector m_singles; // a list of single characters to match + std::vector m_ranges; // a list of end points of our ranges + bool m_negate; // true if the set is to be negated + bool m_has_digraphs; // true if we have digraphs present + mask_type m_classes; // character classes to match + mask_type m_negated_classes; // negated character classes to match + bool m_empty; // whether we've added anything yet + std::vector m_equivalents; // a list of equivalence classes +}; + +template +class basic_regex_creator +{ +public: + basic_regex_creator(regex_data* data); + std::ptrdiff_t getoffset(void* addr) + { + return getoffset(addr, m_pdata->m_data.data()); + } + std::ptrdiff_t getoffset(const void* addr, const void* base) + { + return static_cast(addr) - static_cast(base); + } + re_syntax_base* getaddress(std::ptrdiff_t off) + { + return getaddress(off, m_pdata->m_data.data()); + } + re_syntax_base* getaddress(std::ptrdiff_t off, void* base) + { + return static_cast(static_cast(static_cast(base) + off)); + } + void init(unsigned l_flags) + { + m_pdata->m_flags = l_flags; + m_icase = l_flags & regex_constants::icase; + } + regbase::flag_type flags() + { + return m_pdata->m_flags; + } + void flags(regbase::flag_type f) + { + m_pdata->m_flags = f; + if(m_icase != static_cast(f & regbase::icase)) + { + m_icase = static_cast(f & regbase::icase); + } + } + re_syntax_base* append_state(syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_syntax_base* insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s = sizeof(re_syntax_base)); + re_literal* append_literal(charT c); + re_syntax_base* append_set(const basic_char_set& char_set); + re_syntax_base* append_set(const basic_char_set& char_set, mpl::false_*); + re_syntax_base* append_set(const basic_char_set& char_set, mpl::true_*); + void finalize(const charT* p1, const charT* p2); +protected: + regex_data* m_pdata; // pointer to the basic_regex_data struct we are filling in + const ::cutl_details_boost::regex_traits_wrapper& + m_traits; // convenience reference to traits class + re_syntax_base* m_last_state; // the last state we added + bool m_icase; // true for case insensitive matches + unsigned m_repeater_id; // the state_id of the next repeater + bool m_has_backrefs; // true if there are actually any backrefs + unsigned m_backrefs; // bitmask of permitted backrefs + cutl_details_boost::uintmax_t m_bad_repeats; // bitmask of repeats we can't deduce a startmap for; + bool m_has_recursions; // set when we have recursive expresisons to fixup + typename traits::char_class_type m_word_mask; // mask used to determine if a character is a word character + typename traits::char_class_type m_mask_space; // mask used to determine if a character is a word character + typename traits::char_class_type m_lower_mask; // mask used to determine if a character is a lowercase character + typename traits::char_class_type m_upper_mask; // mask used to determine if a character is an uppercase character + typename traits::char_class_type m_alpha_mask; // mask used to determine if a character is an alphabetic character +private: + basic_regex_creator& operator=(const basic_regex_creator&); + basic_regex_creator(const basic_regex_creator&); + + void fixup_pointers(re_syntax_base* state); + void fixup_recursions(re_syntax_base* state); + void create_startmaps(re_syntax_base* state); + int calculate_backstep(re_syntax_base* state); + void create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask); + unsigned get_restart_type(re_syntax_base* state); + void set_all_masks(unsigned char* bits, unsigned char); + bool is_bad_repeat(re_syntax_base* pt); + void set_bad_repeat(re_syntax_base* pt); + syntax_element_type get_repeat_type(re_syntax_base* state); + void probe_leading_repeat(re_syntax_base* state); +}; + +template +basic_regex_creator::basic_regex_creator(regex_data* data) + : m_pdata(data), m_traits(*(data->m_ptraits)), m_last_state(0), m_repeater_id(0), m_has_backrefs(false), m_backrefs(0), m_has_recursions(false) +{ + m_pdata->m_data.clear(); + m_pdata->m_status = ::cutl_details_boost::regex_constants::error_ok; + static const charT w = 'w'; + static const charT s = 's'; + static const charT l[5] = { 'l', 'o', 'w', 'e', 'r', }; + static const charT u[5] = { 'u', 'p', 'p', 'e', 'r', }; + static const charT a[5] = { 'a', 'l', 'p', 'h', 'a', }; + m_word_mask = m_traits.lookup_classname(&w, &w +1); + m_mask_space = m_traits.lookup_classname(&s, &s +1); + m_lower_mask = m_traits.lookup_classname(l, l + 5); + m_upper_mask = m_traits.lookup_classname(u, u + 5); + m_alpha_mask = m_traits.lookup_classname(a, a + 5); + m_pdata->m_word_mask = m_word_mask; + BOOST_ASSERT(m_word_mask != 0); + BOOST_ASSERT(m_mask_space != 0); + BOOST_ASSERT(m_lower_mask != 0); + BOOST_ASSERT(m_upper_mask != 0); + BOOST_ASSERT(m_alpha_mask != 0); +} + +template +re_syntax_base* basic_regex_creator::append_state(syntax_element_type t, std::size_t s) +{ + // if the state is a backref then make a note of it: + if(t == syntax_element_backref) + this->m_has_backrefs = true; + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // now actually extent our data: + m_last_state = static_cast(m_pdata->m_data.extend(s)); + // fill in boilerplate options in the new state: + m_last_state->next.i = 0; + m_last_state->type = t; + return m_last_state; +} + +template +re_syntax_base* basic_regex_creator::insert_state(std::ptrdiff_t pos, syntax_element_type t, std::size_t s) +{ + // append a new state, start by aligning our last one: + m_pdata->m_data.align(); + // set the offset to the next state in our last one: + if(m_last_state) + m_last_state->next.i = m_pdata->m_data.size() - getoffset(m_last_state); + // remember the last state position: + std::ptrdiff_t off = getoffset(m_last_state) + s; + // now actually insert our data: + re_syntax_base* new_state = static_cast(m_pdata->m_data.insert(pos, s)); + // fill in boilerplate options in the new state: + new_state->next.i = s; + new_state->type = t; + m_last_state = getaddress(off); + return new_state; +} + +template +re_literal* basic_regex_creator::append_literal(charT c) +{ + re_literal* result; + // start by seeing if we have an existing re_literal we can extend: + if((0 == m_last_state) || (m_last_state->type != syntax_element_literal)) + { + // no existing re_literal, create a new one: + result = static_cast(append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + result->length = 1; + *static_cast(static_cast(result+1)) = m_traits.translate(c, m_icase); + } + else + { + // we have an existing re_literal, extend it: + std::ptrdiff_t off = getoffset(m_last_state); + m_pdata->m_data.extend(sizeof(charT)); + m_last_state = result = static_cast(getaddress(off)); + charT* characters = static_cast(static_cast(result+1)); + characters[result->length] = m_traits.translate(c, m_icase); + ++(result->length); + } + return result; +} + +template +inline re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set) +{ + typedef mpl::bool_< (sizeof(charT) == 1) > truth_type; + return char_set.has_digraphs() + ? append_set(char_set, static_cast(0)) + : append_set(char_set, static_cast(0)); +} + +template +re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set, mpl::false_*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set::list_iterator item_iterator; + typedef typename traits::char_class_type mask_type; + + re_set_long* result = static_cast*>(append_state(syntax_element_long_set, sizeof(re_set_long))); + // + // fill in the basics: + // + result->csingles = static_cast(::cutl_details_boost::re_detail::distance(char_set.singles_begin(), char_set.singles_end())); + result->cranges = static_cast(::cutl_details_boost::re_detail::distance(char_set.ranges_begin(), char_set.ranges_end())) / 2; + result->cequivalents = static_cast(::cutl_details_boost::re_detail::distance(char_set.equivalents_begin(), char_set.equivalents_end())); + result->cclasses = char_set.classes(); + result->cnclasses = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust classes as needed: + if(((result->cclasses & m_lower_mask) == m_lower_mask) || ((result->cclasses & m_upper_mask) == m_upper_mask)) + result->cclasses |= m_alpha_mask; + if(((result->cnclasses & m_lower_mask) == m_lower_mask) || ((result->cnclasses & m_upper_mask) == m_upper_mask)) + result->cnclasses |= m_alpha_mask; + } + + result->isnot = char_set.is_negated(); + result->singleton = !char_set.has_digraphs(); + // + // remember where the state is for later: + // + std::ptrdiff_t offset = getoffset(result); + // + // now extend with all the singles: + // + item_iterator first, last; + first = char_set.singles_begin(); + last = char_set.singles_end(); + while(first != last) + { + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (first->second ? 3 : 2))); + p[0] = m_traits.translate(first->first, m_icase); + if(first->second) + { + p[1] = m_traits.translate(first->second, m_icase); + p[2] = 0; + } + else + p[1] = 0; + ++first; + } + // + // now extend with all the ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + digraph c1 = *first; + c1.first = this->m_traits.translate(c1.first, this->m_icase); + c1.second = this->m_traits.translate(c1.second, this->m_icase); + ++first; + digraph c2 = *first; + c2.first = this->m_traits.translate(c2.first, this->m_icase); + c2.second = this->m_traits.translate(c2.second, this->m_icase); + ++first; + string_type s1, s2; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: +#if BOOST_WORKAROUND(__GNUC__, < 3) + string_type in(3, charT(0)); + in[0] = c1.first; + in[1] = c1.second; + s1 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1)); + in[0] = c2.first; + in[1] = c2.second; + s2 = this->m_traits.transform(in.c_str(), (in[1] ? in.c_str()+2 : in.c_str()+1)); +#else + charT a1[3] = { c1.first, c1.second, charT(0), }; + charT a2[3] = { c2.first, c2.second, charT(0), }; + s1 = this->m_traits.transform(a1, (a1[1] ? a1+2 : a1+1)); + s2 = this->m_traits.transform(a2, (a2[1] ? a2+2 : a2+1)); +#endif + if(s1.size() == 0) + s1 = string_type(1, charT(0)); + if(s2.size() == 0) + s2 = string_type(1, charT(0)); + } + else + { + if(c1.second) + { + s1.insert(s1.end(), c1.first); + s1.insert(s1.end(), c1.second); + } + else + s1 = string_type(1, c1.first); + if(c2.second) + { + s2.insert(s2.end(), c2.first); + s2.insert(s2.end(), c2.second); + } + else + s2.insert(s2.end(), c2.first); + } + if(s1 > s2) + { + // Oops error: + return 0; + } + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s1.size() + s2.size() + 2) ) ); + re_detail::copy(s1.begin(), s1.end(), p); + p[s1.size()] = charT(0); + p += s1.size() + 1; + re_detail::copy(s2.begin(), s2.end(), p); + p[s2.size()] = charT(0); + } + // + // now process the equivalence classes: + // + first = char_set.equivalents_begin(); + last = char_set.equivalents_end(); + while(first != last) + { + string_type s; + if(first->second) + { +#if BOOST_WORKAROUND(__GNUC__, < 3) + string_type in(3, charT(0)); + in[0] = first->first; + in[1] = first->second; + s = m_traits.transform_primary(in.c_str(), in.c_str()+2); +#else + charT cs[3] = { first->first, first->second, charT(0), }; + s = m_traits.transform_primary(cs, cs+2); +#endif + } + else + s = m_traits.transform_primary(&first->first, &first->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + charT* p = static_cast(this->m_pdata->m_data.extend(sizeof(charT) * (s.size()+1) ) ); + re_detail::copy(s.begin(), s.end(), p); + p[s.size()] = charT(0); + ++first; + } + // + // finally reset the address of our last state: + // + m_last_state = result = static_cast*>(getaddress(offset)); + return result; +} + +namespace{ + +template +inline bool char_less(T t1, T t2) +{ + return t1 < t2; +} +template<> +inline bool char_less(char t1, char t2) +{ + return static_cast(t1) < static_cast(t2); +} +template<> +inline bool char_less(signed char t1, signed char t2) +{ + return static_cast(t1) < static_cast(t2); +} +} + +template +re_syntax_base* basic_regex_creator::append_set( + const basic_char_set& char_set, mpl::true_*) +{ + typedef typename traits::string_type string_type; + typedef typename basic_char_set::list_iterator item_iterator; + + re_set* result = static_cast(append_state(syntax_element_set, sizeof(re_set))); + bool negate = char_set.is_negated(); + std::memset(result->_map, 0, sizeof(result->_map)); + // + // handle singles first: + // + item_iterator first, last; + first = char_set.singles_begin(); + last = char_set.singles_end(); + while(first != last) + { + for(unsigned int i = 0; i < (1 << CHAR_BIT); ++i) + { + if(this->m_traits.translate(static_cast(i), this->m_icase) + == this->m_traits.translate(first->first, this->m_icase)) + result->_map[i] = true; + } + ++first; + } + // + // OK now handle ranges: + // + first = char_set.ranges_begin(); + last = char_set.ranges_end(); + while(first != last) + { + // first grab the endpoints of the range: + charT c1 = this->m_traits.translate(first->first, this->m_icase); + ++first; + charT c2 = this->m_traits.translate(first->first, this->m_icase); + ++first; + // different actions now depending upon whether collation is turned on: + if(flags() & regex_constants::collate) + { + // we need to transform our range into sort keys: + charT c3[2] = { c1, charT(0), }; + string_type s1 = this->m_traits.transform(c3, c3+1); + c3[0] = c2; + string_type s2 = this->m_traits.transform(c3, c3+1); + if(s1 > s2) + { + // Oops error: + return 0; + } + BOOST_ASSERT(c3[1] == charT(0)); + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + c3[0] = static_cast(i); + string_type s3 = this->m_traits.transform(c3, c3 +1); + if((s1 <= s3) && (s3 <= s2)) + result->_map[i] = true; + } + } + else + { + if(char_less(c2, c1)) + { + // Oops error: + return 0; + } + // everything in range matches: + std::memset(result->_map + static_cast(c1), true, 1 + static_cast(c2) - static_cast(c1)); + } + } + // + // and now the classes: + // + typedef typename traits::char_class_type mask_type; + mask_type m = char_set.classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(this->m_traits.isctype(static_cast(i), m)) + result->_map[i] = true; + } + } + // + // and now the negated classes: + // + m = char_set.negated_classes(); + if(flags() & regbase::icase) + { + // adjust m as needed: + if(((m & m_lower_mask) == m_lower_mask) || ((m & m_upper_mask) == m_upper_mask)) + m |= m_alpha_mask; + } + if(m != 0) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + if(0 == this->m_traits.isctype(static_cast(i), m)) + result->_map[i] = true; + } + } + // + // now process the equivalence classes: + // + first = char_set.equivalents_begin(); + last = char_set.equivalents_end(); + while(first != last) + { + string_type s; + BOOST_ASSERT(static_cast(0) == first->second); + s = m_traits.transform_primary(&first->first, &first->first+1); + if(s.empty()) + return 0; // invalid or unsupported equivalence class + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c[2] = { (static_cast(i)), charT(0), }; + string_type s2 = this->m_traits.transform_primary(c, c+1); + if(s == s2) + result->_map[i] = true; + } + ++first; + } + if(negate) + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + { + result->_map[i] = !(result->_map[i]); + } + } + return result; +} + +template +void basic_regex_creator::finalize(const charT* p1, const charT* p2) +{ + if(this->m_pdata->m_status) + return; + // we've added all the states we need, now finish things off. + // start by adding a terminating state: + append_state(syntax_element_match); + // extend storage to store original expression: + std::ptrdiff_t len = p2 - p1; + m_pdata->m_expression_len = len; + charT* ps = static_cast(m_pdata->m_data.extend(sizeof(charT) * (1 + (p2 - p1)))); + m_pdata->m_expression = ps; + re_detail::copy(p1, p2, ps); + ps[p2 - p1] = 0; + // fill in our other data... + // successful parsing implies a zero status: + m_pdata->m_status = 0; + // get the first state of the machine: + m_pdata->m_first_state = static_cast(m_pdata->m_data.data()); + // fixup pointers in the machine: + fixup_pointers(m_pdata->m_first_state); + if(m_has_recursions) + { + m_pdata->m_has_recursions = true; + fixup_recursions(m_pdata->m_first_state); + if(this->m_pdata->m_status) + return; + } + else + m_pdata->m_has_recursions = false; + // create nested startmaps: + create_startmaps(m_pdata->m_first_state); + // create main startmap: + std::memset(m_pdata->m_startmap, 0, sizeof(m_pdata->m_startmap)); + m_pdata->m_can_be_null = 0; + + m_bad_repeats = 0; + create_startmap(m_pdata->m_first_state, m_pdata->m_startmap, &(m_pdata->m_can_be_null), mask_all); + // get the restart type: + m_pdata->m_restart_type = get_restart_type(m_pdata->m_first_state); + // optimise a leading repeat if there is one: + probe_leading_repeat(m_pdata->m_first_state); +} + +template +void basic_regex_creator::fixup_pointers(re_syntax_base* state) +{ + while(state) + { + switch(state->type) + { + case syntax_element_recurse: + m_has_recursions = true; + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + break; + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // set the state_id of this repeat: + static_cast(state)->state_id = m_repeater_id++; + // fall through: + case syntax_element_alt: + std::memset(static_cast(state)->_map, 0, sizeof(static_cast(state)->_map)); + static_cast(state)->can_be_null = 0; + // fall through: + case syntax_element_jump: + static_cast(state)->alt.p = getaddress(static_cast(state)->alt.i, state); + // fall through again: + default: + if(state->next.i) + state->next.p = getaddress(state->next.i, state); + else + state->next.p = 0; + } + state = state->next.p; + } +} + +template +void basic_regex_creator::fixup_recursions(re_syntax_base* state) +{ + re_syntax_base* base = state; + while(state) + { + switch(state->type) + { + case syntax_element_assert_backref: + { + // just check that the index is valid: + int idx = static_cast(state)->index; + if(idx < 0) + { + idx = -idx-1; + if(idx >= 10000) + { + idx = m_pdata->get_id(idx); + if(idx <= 0) + { + // check of sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = cutl_details_boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a marked sub-expression that does not exist."; + cutl_details_boost::regex_error e(message, cutl_details_boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + } + } + break; + case syntax_element_recurse: + { + bool ok = false; + re_syntax_base* p = base; + std::ptrdiff_t idx = static_cast(state)->alt.i; + if(idx > 10000) + { + // + // There may be more than one capture group with this hash, just do what Perl + // does and recurse to the leftmost: + // + idx = m_pdata->get_id(static_cast(idx)); + } + while(p) + { + if((p->type == syntax_element_startmark) && (static_cast(p)->index == idx)) + { + // + // We've found the target of the recursion, set the jump target: + // + static_cast(state)->alt.p = p; + ok = true; + // + // Now scan the target for nested repeats: + // + p = p->next.p; + int next_rep_id = 0; + while(p) + { + switch(p->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + next_rep_id = static_cast(p)->state_id; + break; + case syntax_element_endmark: + if(static_cast(p)->index == idx) + next_rep_id = -1; + break; + default: + break; + } + if(next_rep_id) + break; + p = p->next.p; + } + if(next_rep_id > 0) + { + static_cast(state)->state_id = next_rep_id - 1; + } + + break; + } + p = p->next.p; + } + if(!ok) + { + // recursion to sub-expression that doesn't exist: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = cutl_details_boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered a forward reference to a recursive sub-expression that does not exist."; + cutl_details_boost::regex_error e(message, cutl_details_boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + } + default: + break; + } + state = state->next.p; + } +} + +template +void basic_regex_creator::create_startmaps(re_syntax_base* state) +{ + // non-recursive implementation: + // create the last map in the machine first, so that earlier maps + // can make use of the result... + // + // This was originally a recursive implementation, but that caused stack + // overflows with complex expressions on small stacks (think COM+). + + // start by saving the case setting: + bool l_icase = m_icase; + std::vector > v; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + // we need to track case changes here: + m_icase = static_cast(state)->icase; + state = state->next.p; + continue; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + // just push the state onto our stack for now: + v.push_back(std::pair(m_icase, state)); + state = state->next.p; + break; + case syntax_element_backstep: + // we need to calculate how big the backstep is: + static_cast(state)->index + = this->calculate_backstep(state->next.p); + if(static_cast(state)->index < 0) + { + // Oops error: + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = cutl_details_boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Invalid lookbehind assertion encountered in the regular expression."; + cutl_details_boost::regex_error e(message, cutl_details_boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + // fall through: + default: + state = state->next.p; + } + } + // now work through our list, building all the maps as we go: + while(v.size()) + { + const std::pair& p = v.back(); + m_icase = p.first; + state = p.second; + v.pop_back(); + + // Build maps: + m_bad_repeats = 0; + create_startmap(state->next.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_take); + m_bad_repeats = 0; + create_startmap(static_cast(state)->alt.p, static_cast(state)->_map, &static_cast(state)->can_be_null, mask_skip); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + } + // restore case sensitivity: + m_icase = l_icase; +} + +template +int basic_regex_creator::calculate_backstep(re_syntax_base* state) +{ + typedef typename traits::char_class_type mask_type; + int result = 0; + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + { + state = static_cast(state->next.p)->alt.p->next.p; + continue; + } + else if(static_cast(state)->index == -3) + { + state = state->next.p->next.p; + continue; + } + break; + case syntax_element_endmark: + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + return result; + break; + case syntax_element_literal: + result += static_cast(state)->length; + break; + case syntax_element_wild: + case syntax_element_set: + result += 1; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_backref: + case syntax_element_rep: + case syntax_element_combining: + case syntax_element_long_set_rep: + case syntax_element_backstep: + { + re_repeat* rep = static_cast(state); + // adjust the type of the state to allow for faster matching: + state->type = this->get_repeat_type(state); + if((state->type == syntax_element_dot_rep) + || (state->type == syntax_element_char_rep) + || (state->type == syntax_element_short_set_rep)) + { + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + else if((state->type == syntax_element_long_set_rep)) + { + BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + if(static_cast*>(rep->next.p)->singleton == 0) + return -1; + if(rep->max != rep->min) + return -1; + result += static_cast(rep->min); + state = rep->alt.p; + continue; + } + } + return -1; + case syntax_element_long_set: + if(static_cast*>(state)->singleton == 0) + return -1; + result += 1; + break; + case syntax_element_jump: + state = static_cast(state)->alt.p; + continue; + case syntax_element_alt: + { + int r1 = calculate_backstep(state->next.p); + int r2 = calculate_backstep(static_cast(state)->alt.p); + if((r1 < 0) || (r1 != r2)) + return -1; + return result + r1; + } + default: + break; + } + state = state->next.p; + } + return -1; +} + +template +void basic_regex_creator::create_startmap(re_syntax_base* state, unsigned char* l_map, unsigned int* pnull, unsigned char mask) +{ + int not_last_jump = 1; + re_syntax_base* recursion_start = 0; + int recursion_sub = 0; + re_syntax_base* recursion_restart = 0; + + // track case sensitivity: + bool l_icase = m_icase; + + while(state) + { + switch(state->type) + { + case syntax_element_toggle_case: + l_icase = static_cast(state)->icase; + state = state->next.p; + break; + case syntax_element_literal: + { + // don't set anything in *pnull, set each element in l_map + // that could match the first character in the literal: + if(l_map) + { + l_map[0] |= mask_init; + charT first_char = *static_cast(static_cast(static_cast(state) + 1)); + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.translate(static_cast(i), l_icase) == first_char) + l_map[i] |= mask; + } + } + return; + } + case syntax_element_end_line: + { + // next character must be a line separator (if there is one): + if(l_map) + { + l_map[0] |= mask_init; + l_map['\n'] |= mask; + l_map['\r'] |= mask; + l_map['\f'] |= mask; + l_map[0x85] |= mask; + } + // now figure out if we can match a NULL string at this point: + if(pnull) + create_startmap(state->next.p, 0, pnull, mask); + return; + } + case syntax_element_recurse: + { + if(recursion_start == state) + { + // Infinite recursion!! + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = cutl_details_boost::regex_constants::error_bad_pattern; + // + // clear the expression, we should be empty: + // + this->m_pdata->m_expression = 0; + this->m_pdata->m_expression_len = 0; + // + // and throw if required: + // + if(0 == (this->flags() & regex_constants::no_except)) + { + std::string message = "Encountered an infinite recursion."; + cutl_details_boost::regex_error e(message, cutl_details_boost::regex_constants::error_bad_pattern, 0); + e.raise(); + } + } + else if(recursion_start == 0) + { + recursion_start = state; + recursion_restart = state->next.p; + state = static_cast(state)->alt.p; + if(state->type == syntax_element_startmark) + recursion_sub = static_cast(state)->index; + else + recursion_sub = 0; + break; + } + // fall through, can't handle nested recursion here... + } + case syntax_element_backref: + // can be null, and any character can match: + if(pnull) + *pnull |= mask; + // fall through: + case syntax_element_wild: + { + // can't be null, any character can match: + set_all_masks(l_map, mask); + return; + } + case syntax_element_match: + { + // must be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_word_start: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(!m_traits.isctype(static_cast(i), m_word_mask)) + l_map[i] &= static_cast(~mask); + } + } + return; + } + case syntax_element_word_end: + { + // recurse, then AND with all the word characters: + create_startmap(state->next.p, l_map, pnull, mask); + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(m_traits.isctype(static_cast(i), m_word_mask)) + l_map[i] &= static_cast(~mask); + } + } + return; + } + case syntax_element_buffer_end: + { + // we *must be null* : + if(pnull) + *pnull |= mask; + return; + } + case syntax_element_long_set: + if(l_map) + { + typedef typename traits::char_class_type mask_type; + if(static_cast*>(state)->singleton) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + charT c = static_cast(i); + if(&c != re_is_set_member(&c, &c + 1, static_cast*>(state), *m_pdata, m_icase)) + l_map[i] |= mask; + } + } + else + set_all_masks(l_map, mask); + } + return; + case syntax_element_set: + if(l_map) + { + l_map[0] |= mask_init; + for(unsigned int i = 0; i < (1u << CHAR_BIT); ++i) + { + if(static_cast(state)->_map[ + static_cast(m_traits.translate(static_cast(i), l_icase))]) + l_map[i] |= mask; + } + } + return; + case syntax_element_jump: + // take the jump: + state = static_cast(state)->alt.p; + not_last_jump = -1; + break; + case syntax_element_alt: + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + re_alt* rep = static_cast(state); + if(rep->_map[0] & mask_init) + { + if(l_map) + { + // copy previous results: + l_map[0] |= mask_init; + for(unsigned int i = 0; i <= UCHAR_MAX; ++i) + { + if(rep->_map[i] & mask_any) + l_map[i] |= mask; + } + } + if(pnull) + { + if(rep->can_be_null & mask_any) + *pnull |= mask; + } + } + else + { + // we haven't created a startmap for this alternative yet + // so take the union of the two options: + if(is_bad_repeat(state)) + { + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + set_bad_repeat(state); + create_startmap(state->next.p, l_map, pnull, mask); + if((state->type == syntax_element_alt) + || (static_cast(state)->min == 0) + || (not_last_jump == 0)) + create_startmap(rep->alt.p, l_map, pnull, mask); + } + } + return; + case syntax_element_soft_buffer_end: + // match newline or null: + if(l_map) + { + l_map[0] |= mask_init; + l_map['\n'] |= mask; + l_map['\r'] |= mask; + } + if(pnull) + *pnull |= mask; + return; + case syntax_element_endmark: + // need to handle independent subs as a special case: + if(static_cast(state)->index < 0) + { + // can be null, any character can match: + set_all_masks(l_map, mask); + if(pnull) + *pnull |= mask; + return; + } + else if(recursion_start && (recursion_sub != 0) && (recursion_sub == static_cast(state)->index)) + { + // recursion termination: + recursion_start = 0; + state = recursion_restart; + break; + } + + // + // Normally we just go to the next state... but if this sub-expression is + // the target of a recursion, then we might be ending a recursion, in which + // case we should check whatever follows that recursion, as well as whatever + // follows this state: + // + if(m_pdata->m_has_recursions && static_cast(state)->index) + { + bool ok = false; + re_syntax_base* p = m_pdata->m_first_state; + while(p) + { + if((p->type == syntax_element_recurse)) + { + re_brace* p2 = static_cast(static_cast(p)->alt.p); + if((p2->type == syntax_element_startmark) && (p2->index == static_cast(state)->index)) + { + ok = true; + break; + } + } + p = p->next.p; + } + if(ok) + { + create_startmap(p->next.p, l_map, pnull, mask); + } + } + state = state->next.p; + break; + + case syntax_element_startmark: + // need to handle independent subs as a special case: + if(static_cast(state)->index == -3) + { + state = state->next.p->next.p; + break; + } + // otherwise fall through: + default: + state = state->next.p; + } + ++not_last_jump; + } +} + +template +unsigned basic_regex_creator::get_restart_type(re_syntax_base* state) +{ + // + // find out how the machine starts, so we can optimise the search: + // + while(state) + { + switch(state->type) + { + case syntax_element_startmark: + case syntax_element_endmark: + state = state->next.p; + continue; + case syntax_element_start_line: + return regbase::restart_line; + case syntax_element_word_start: + return regbase::restart_word; + case syntax_element_buffer_start: + return regbase::restart_buf; + case syntax_element_restart_continue: + return regbase::restart_continue; + default: + state = 0; + continue; + } + } + return regbase::restart_any; +} + +template +void basic_regex_creator::set_all_masks(unsigned char* bits, unsigned char mask) +{ + // + // set mask in all of bits elements, + // if bits[0] has mask_init not set then we can + // optimise this to a call to memset: + // + if(bits) + { + if(bits[0] == 0) + (std::memset)(bits, mask, 1u << CHAR_BIT); + else + { + for(unsigned i = 0; i < (1u << CHAR_BIT); ++i) + bits[i] |= mask; + } + bits[0] |= mask_init; + } +} + +template +bool basic_regex_creator::is_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast(pt)->state_id; + if(state_id > sizeof(m_bad_repeats) * CHAR_BIT) + return true; // run out of bits, assume we can't traverse this one. + static const cutl_details_boost::uintmax_t one = 1uL; + return m_bad_repeats & (one << state_id); + } + default: + return false; + } +} + +template +void basic_regex_creator::set_bad_repeat(re_syntax_base* pt) +{ + switch(pt->type) + { + case syntax_element_rep: + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + { + unsigned state_id = static_cast(pt)->state_id; + static const cutl_details_boost::uintmax_t one = 1uL; + if(state_id <= sizeof(m_bad_repeats) * CHAR_BIT) + m_bad_repeats |= (one << state_id); + } + default: + break; + } +} + +template +syntax_element_type basic_regex_creator::get_repeat_type(re_syntax_base* state) +{ + typedef typename traits::char_class_type mask_type; + if(state->type == syntax_element_rep) + { + // check to see if we are repeating a single state: + if(state->next.p->next.p->next.p == static_cast(state)->alt.p) + { + switch(state->next.p->type) + { + case re_detail::syntax_element_wild: + return re_detail::syntax_element_dot_rep; + case re_detail::syntax_element_literal: + return re_detail::syntax_element_char_rep; + case re_detail::syntax_element_set: + return re_detail::syntax_element_short_set_rep; + case re_detail::syntax_element_long_set: + if(static_cast*>(state->next.p)->singleton) + return re_detail::syntax_element_long_set_rep; + break; + default: + break; + } + } + } + return state->type; +} + +template +void basic_regex_creator::probe_leading_repeat(re_syntax_base* state) +{ + // enumerate our states, and see if we have a leading repeat + // for which failed search restarts can be optimised; + do + { + switch(state->type) + { + case syntax_element_startmark: + if(static_cast(state)->index >= 0) + { + state = state->next.p; + continue; + } + if((static_cast(state)->index == -1) + || (static_cast(state)->index == -2)) + { + // skip past the zero width assertion: + state = static_cast(state->next.p)->alt.p->next.p; + continue; + } + if(static_cast(state)->index == -3) + { + // Have to skip the leading jump state: + state = state->next.p->next.p; + continue; + } + return; + case syntax_element_endmark: + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_restart_continue: + state = state->next.p; + break; + case syntax_element_dot_rep: + case syntax_element_char_rep: + case syntax_element_short_set_rep: + case syntax_element_long_set_rep: + if(this->m_has_backrefs == 0) + static_cast(state)->leading = true; + // fall through: + default: + return; + } + }while(state); +} + + +} // namespace re_detail + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/basic_regex_parser.hpp b/cutl/details/boost/regex/v4/basic_regex_parser.hpp new file mode 100644 index 0000000..63475e8 --- /dev/null +++ b/cutl/details/boost/regex/v4/basic_regex_parser.hpp @@ -0,0 +1,2817 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_parser.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_parser. + */ + +#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP +#define BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4244 4800) +#endif + +template +class basic_regex_parser : public basic_regex_creator +{ +public: + basic_regex_parser(regex_data* data); + void parse(const charT* p1, const charT* p2, unsigned flags); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos); + void fail(regex_constants::error_type error_code, std::ptrdiff_t position, const std::string& message) + { + fail(error_code, position, message, position); + } + + bool parse_all(); + bool parse_basic(); + bool parse_extended(); + bool parse_literal(); + bool parse_open_paren(); + bool parse_basic_escape(); + bool parse_extended_escape(); + bool parse_match_any(); + bool parse_repeat(std::size_t low = 0, std::size_t high = (std::numeric_limits::max)()); + bool parse_repeat_range(bool isbasic); + bool parse_alt(); + bool parse_set(); + bool parse_backref(); + void parse_set_literal(basic_char_set& char_set); + bool parse_inner_set(basic_char_set& char_set); + bool parse_QE(); + bool parse_perl_extension(); + bool add_emacs_code(bool negate); + bool unwind_alts(std::ptrdiff_t last_paren_start); + digraph get_next_set_literal(basic_char_set& char_set); + charT unescape_character(); + regex_constants::syntax_option_type parse_options(); + +private: + typedef bool (basic_regex_parser::*parser_proc_type)(); + typedef typename traits::string_type string_type; + typedef typename traits::char_class_type char_class_type; + parser_proc_type m_parser_proc; // the main parser to use + const charT* m_base; // the start of the string being parsed + const charT* m_end; // the end of the string being parsed + const charT* m_position; // our current parser position + unsigned m_mark_count; // how many sub-expressions we have + int m_mark_reset; // used to indicate that we're inside a (?|...) block. + unsigned m_max_mark; // largest mark count seen inside a (?|...) block. + std::ptrdiff_t m_paren_start; // where the last seen ')' began (where repeats are inserted). + std::ptrdiff_t m_alt_insert_point; // where to insert the next alternative + bool m_has_case_change; // true if somewhere in the current block the case has changed +#if defined(BOOST_MSVC) && defined(_M_IX86) + // This is an ugly warning suppression workaround (for warnings *inside* std::vector + // that can not otherwise be suppressed)... + BOOST_STATIC_ASSERT(sizeof(long) >= sizeof(void*)); + std::vector m_alt_jumps; // list of alternative in the current scope. +#else + std::vector m_alt_jumps; // list of alternative in the current scope. +#endif + + basic_regex_parser& operator=(const basic_regex_parser&); + basic_regex_parser(const basic_regex_parser&); +}; + +template +basic_regex_parser::basic_regex_parser(regex_data* data) + : basic_regex_creator(data), m_mark_count(0), m_mark_reset(-1), m_max_mark(0), m_paren_start(0), m_alt_insert_point(0), m_has_case_change(false) +{ +} + +template +void basic_regex_parser::parse(const charT* p1, const charT* p2, unsigned l_flags) +{ + // pass l_flags on to base class: + this->init(l_flags); + // set up pointers: + m_position = m_base = p1; + m_end = p2; + // empty strings are errors: + if((p1 == p2) && + ( + ((l_flags & regbase::main_option_type) != regbase::perl_syntax_group) + || (l_flags & regbase::no_empty_expressions) + ) + ) + { + fail(regex_constants::error_empty, 0); + return; + } + // select which parser to use: + switch(l_flags & regbase::main_option_type) + { + case regbase::perl_syntax_group: + { + m_parser_proc = &basic_regex_parser::parse_extended; + // + // Add a leading paren with index zero to give recursions a target: + // + re_brace* br = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + br->index = 0; + br->icase = this->flags() & regbase::icase; + break; + } + case regbase::basic_syntax_group: + m_parser_proc = &basic_regex_parser::parse_basic; + break; + case regbase::literal: + m_parser_proc = &basic_regex_parser::parse_literal; + break; + default: + // Ooops, someone has managed to set more than one of the main option flags, + // so this must be an error: + fail(regex_constants::error_unknown, 0, "An invalid combination of regular expression syntax flags was used."); + return; + } + + // parse all our characters: + bool result = parse_all(); + // + // Unwind our alternatives: + // + unwind_alts(-1); + // reset l_flags as a global scope (?imsx) may have altered them: + this->flags(l_flags); + // if we haven't gobbled up all the characters then we must + // have had an unexpected ')' : + if(!result) + { + fail(regex_constants::error_paren, ::cutl_details_boost::re_detail::distance(m_base, m_position), "Found a closing ) with no corresponding openening parenthesis."); + return; + } + // if an error has been set then give up now: + if(this->m_pdata->m_status) + return; + // fill in our sub-expression count: + this->m_pdata->m_mark_count = 1 + m_mark_count; + this->finalize(p1, p2); +} + +template +void basic_regex_parser::fail(regex_constants::error_type error_code, std::ptrdiff_t position) +{ + // get the error message: + std::string message = this->m_pdata->m_ptraits->error_string(error_code); + fail(error_code, position, message); +} + +template +void basic_regex_parser::fail(regex_constants::error_type error_code, std::ptrdiff_t position, std::string message, std::ptrdiff_t start_pos) +{ + if(0 == this->m_pdata->m_status) // update the error code if not already set + this->m_pdata->m_status = error_code; + m_position = m_end; // don't bother parsing anything else + + // + // Augment error message with the regular expression text: + // + if(start_pos == position) + start_pos = (std::max)(static_cast(0), position - static_cast(10)); + std::ptrdiff_t end_pos = (std::min)(position + static_cast(10), static_cast(m_end - m_base)); + if(error_code != regex_constants::error_empty) + { + if((start_pos != 0) || (end_pos != (m_end - m_base))) + message += " The error occured while parsing the regular expression fragment: '"; + else + message += " The error occured while parsing the regular expression: '"; + if(start_pos != end_pos) + { + message += std::string(m_base + start_pos, m_base + position); + message += ">>>HERE>>>"; + message += std::string(m_base + position, m_base + end_pos); + } + message += "'."; + } + +#ifndef BOOST_NO_EXCEPTIONS + if(0 == (this->flags() & regex_constants::no_except)) + { + cutl_details_boost::regex_error e(message, error_code, position); + e.raise(); + } +#else + (void)position; // suppress warnings. +#endif +} + +template +bool basic_regex_parser::parse_all() +{ + bool result = true; + while(result && (m_position != m_end)) + { + result = (this->*m_parser_proc)(); + } + return result; +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4702) +#endif +template +bool basic_regex_parser::parse_basic() +{ + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_escape: + return parse_basic_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state(syntax_element_start_line); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state(syntax_element_end_line); + break; + case regex_constants::syntax_star: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(); + } + case regex_constants::syntax_plus: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(1); + } + case regex_constants::syntax_question: + if(!(this->m_last_state) || (this->m_last_state->type == syntax_element_start_line) || !(this->flags() & regbase::emacs_ex)) + return parse_literal(); + else + { + ++m_position; + return parse_repeat(0, 1); + } + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + default: + return parse_literal(); + } + return true; +} + +template +bool basic_regex_parser::parse_extended() +{ + bool result = true; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_escape: + return parse_extended_escape(); + case regex_constants::syntax_dot: + return parse_match_any(); + case regex_constants::syntax_caret: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_start : syntax_element_start_line)); + break; + case regex_constants::syntax_dollar: + ++m_position; + this->append_state( + (this->flags() & regex_constants::no_mod_m ? syntax_element_buffer_end : syntax_element_end_line)); + break; + case regex_constants::syntax_star: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"*\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(); + case regex_constants::syntax_question: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"?\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(0,1); + case regex_constants::syntax_plus: + if(m_position == this->m_base) + { + fail(regex_constants::error_badrepeat, 0, "The repeat operator \"+\" cannot start a regular expression."); + return false; + } + ++m_position; + return parse_repeat(1); + case regex_constants::syntax_open_brace: + ++m_position; + return parse_repeat_range(false); + case regex_constants::syntax_close_brace: + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); + return false; + case regex_constants::syntax_or: + return parse_alt(); + case regex_constants::syntax_open_set: + return parse_set(); + case regex_constants::syntax_newline: + if(this->flags() & regbase::newline_alt) + return parse_alt(); + else + return parse_literal(); + case regex_constants::syntax_hash: + // + // If we have a mod_x flag set, then skip until + // we get to a newline character: + // + if((this->flags() + & (regbase::no_perl_ex|regbase::mod_x)) + == regbase::mod_x) + { + while((m_position != m_end) && !is_separator(*m_position++)){} + return true; + } + // Otherwise fall through: + default: + result = parse_literal(); + break; + } + return result; +} +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template +bool basic_regex_parser::parse_literal() +{ + // append this as a literal provided it's not a space character + // or the perl option regbase::mod_x is not set: + if( + ((this->flags() + & (regbase::main_option_type|regbase::mod_x|regbase::no_perl_ex)) + != regbase::mod_x) + || !this->m_traits.isctype(*m_position, this->m_mask_space)) + this->append_literal(*m_position); + ++m_position; + return true; +} + +template +bool basic_regex_parser::parse_open_paren() +{ + // + // skip the '(' and error check: + // + if(++m_position == m_end) + { + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + // + // begin by checking for a perl-style (?...) extension: + // + if( + ((this->flags() & (regbase::main_option_type | regbase::no_perl_ex)) == 0) + || ((this->flags() & (regbase::main_option_type | regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + ) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) + return parse_perl_extension(); + } + // + // update our mark count, and append the required state: + // + unsigned markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; +#ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 1, 0)); +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair((m_position - m_base) - 1, 0)); +#endif + } + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + // + // back up the current flags in case we have a nested (?imsx) group: + // + regex_constants::syntax_option_type opts = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; // no changes to this scope as yet... + // + // Back up branch reset data in case we have a nested (?|...) + // + int mark_reset = m_mark_reset; + m_mark_reset = -1; + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind pushed alternatives: + // + if(0 == unwind_alts(last_paren_start)) + return false; + // + // restore flags: + // + if(m_has_case_change) + { + // the case has changed in one or more of the alternatives + // within the scoped (...) block: we have to add a state + // to reset the case sensitivity: + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + this->flags(opts); + m_has_case_change = old_case_change; + // + // restore branch reset: + // + m_mark_reset = mark_reset; + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + this->fail(regex_constants::error_paren, ::cutl_details_boost::re_detail::distance(m_base, m_end)); + return false; + } + BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); +#ifndef BOOST_NO_STD_DISTANCE + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position); +#else + if(markid && (this->flags() & regbase::save_subexpression_location)) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base); +#endif + ++m_position; + // + // append closing parenthesis state: + // + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // allow backrefs to this mark: + // + if((markid > 0) && (markid < sizeof(unsigned) * CHAR_BIT)) + this->m_backrefs |= 1u << (markid - 1); + + return true; +} + +template +bool basic_regex_parser::parse_basic_escape() +{ + ++m_position; + bool result = true; + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::syntax_open_mark: + return parse_open_paren(); + case regex_constants::syntax_close_mark: + return false; + case regex_constants::syntax_plus: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(1); + } + else + return parse_literal(); + case regex_constants::syntax_question: + if(this->flags() & regex_constants::bk_plus_qm) + { + ++m_position; + return parse_repeat(0, 1); + } + else + return parse_literal(); + case regex_constants::syntax_open_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + ++m_position; + return parse_repeat_range(true); + case regex_constants::syntax_close_brace: + if(this->flags() & regbase::no_intervals) + return parse_literal(); + fail(regex_constants::error_brace, this->m_position - this->m_base, "Found a closing repetition operator } with no corresponding {."); + return false; + case regex_constants::syntax_or: + if(this->flags() & regbase::bk_vbar) + return parse_alt(); + else + result = parse_literal(); + break; + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_start_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_end_buffer: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_buffer_end); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_boundary); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_not_word_assert: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_within_word); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_left_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_start); + } + else + result = parse_literal(); + break; + case regex_constants::escape_type_right_word: + if(this->flags() & regbase::emacs_ex) + { + ++m_position; + this->append_state(syntax_element_word_end); + } + else + result = parse_literal(); + break; + default: + if(this->flags() & regbase::emacs_ex) + { + bool negate = true; + switch(*m_position) + { + case 'w': + negate = false; + // fall through: + case 'W': + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(this->m_word_mask); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + case 's': + negate = false; + // fall through: + case 'S': + return add_emacs_code(negate); + case 'c': + case 'C': + // not supported yet: + fail(regex_constants::error_escape, m_position - m_base, "The \\c and \\C escape sequences are not supported by POSIX basic regular expressions: try the Perl syntax instead."); + return false; + default: + break; + } + } + result = parse_literal(); + break; + } + return result; +} + +template +bool basic_regex_parser::parse_extended_escape() +{ + ++m_position; + bool negate = false; // in case this is a character class escape: \w \d etc + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_not_class: + negate = true; + // fall through: + case regex_constants::escape_type_class: + { +escape_type_class_jump: + typedef typename traits::char_class_type mask_type; + mask_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; + } + // + // not a class, just a regular unknown escape: + // + this->append_literal(unescape_character()); + break; + } + case regex_constants::syntax_digit: + return parse_backref(); + case regex_constants::escape_type_left_word: + ++m_position; + this->append_state(syntax_element_word_start); + break; + case regex_constants::escape_type_right_word: + ++m_position; + this->append_state(syntax_element_word_end); + break; + case regex_constants::escape_type_start_buffer: + ++m_position; + this->append_state(syntax_element_buffer_start); + break; + case regex_constants::escape_type_end_buffer: + ++m_position; + this->append_state(syntax_element_buffer_end); + break; + case regex_constants::escape_type_word_assert: + ++m_position; + this->append_state(syntax_element_word_boundary); + break; + case regex_constants::escape_type_not_word_assert: + ++m_position; + this->append_state(syntax_element_within_word); + break; + case regex_constants::escape_type_Z: + ++m_position; + this->append_state(syntax_element_soft_buffer_end); + break; + case regex_constants::escape_type_Q: + return parse_QE(); + case regex_constants::escape_type_C: + return parse_match_any(); + case regex_constants::escape_type_X: + ++m_position; + this->append_state(syntax_element_combining); + break; + case regex_constants::escape_type_G: + ++m_position; + this->append_state(syntax_element_restart_continue); + break; + case regex_constants::escape_type_not_property: + negate = true; + // fall through: + case regex_constants::escape_type_property: + { + ++m_position; + char_class_type m; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Incomplete property escape found."); + return false; + } + // maybe have \p{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Closing } missing from property escape sequence."); + return false; + } + m = this->m_traits.lookup_classname(++base, m_position++); + } + else + { + m = this->m_traits.lookup_classname(m_position, m_position+1); + ++m_position; + } + if(m != 0) + { + basic_char_set char_set; + if(negate) + char_set.negate(); + char_set.add_class(m); + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + return true; + } + fail(regex_constants::error_ctype, m_position - m_base, "Escape sequence was neither a valid property nor a valid character class name."); + return false; + } + case regex_constants::escape_type_reset_start_mark: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->index = -5; + pb->icase = this->flags() & regbase::icase; + this->m_pdata->m_data.align(); + ++m_position; + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_line_ending: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + const charT* e = get_escape_R_string(); + const charT* old_position = m_position; + const charT* old_end = m_end; + const charT* old_base = m_base; + m_position = e; + m_base = e; + m_end = e + traits::length(e); + bool r = parse_all(); + m_position = ++old_position; + m_end = old_end; + m_base = old_base; + return r; + } + goto escape_type_class_jump; + case regex_constants::escape_type_extended_backref: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + { + bool have_brace = false; + bool negative = false; + static const char* incomplete_message = "Incomplete \\g escape found."; + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + // maybe have \g{ddd} + regex_constants::syntax_type syn = this->m_traits.syntax_type(*m_position); + regex_constants::syntax_type syn_end = 0; + if((syn == regex_constants::syntax_open_brace) + || (syn == regex_constants::escape_type_left_word) + || (syn == regex_constants::escape_type_end_buffer)) + { + if(++m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + have_brace = true; + switch(syn) + { + case regex_constants::syntax_open_brace: + syn_end = regex_constants::syntax_close_brace; + break; + case regex_constants::escape_type_left_word: + syn_end = regex_constants::escape_type_right_word; + break; + default: + syn_end = regex_constants::escape_type_end_buffer; + break; + } + } + negative = (*m_position == static_cast('-')); + if((negative) && (++m_position == m_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + const charT* pc = m_position; + int i = this->m_traits.toi(pc, m_end, 10); + if((i < 0) && syn_end) + { + // Check for a named capture, get the leftmost one if there is more than one: + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != syn_end)) + { + ++m_position; + } + i = hash_value_from_capture_name(base, m_position); + pc = m_position; + } + if(negative) + i = 1 + m_mark_count - i; + if(((i > 0) && (this->m_backrefs & (1u << (i-1)))) || ((i > 10000) && (this->m_pdata->get_id(i) > 0) && (this->m_backrefs & (1u << (this->m_pdata->get_id(i)-1))))) + { + m_position = pc; + re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = i; + pb->icase = this->flags() & regbase::icase; + } + else + { + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + m_position = pc; + if(have_brace) + { + if((m_position == m_end) || (this->m_traits.syntax_type(*m_position) != syn_end)) + { + fail(regex_constants::error_escape, m_position - m_base, incomplete_message); + return false; + } + ++m_position; + } + return true; + } + goto escape_type_class_jump; + case regex_constants::escape_type_control_v: + if(0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + goto escape_type_class_jump; + // fallthrough: + default: + this->append_literal(unescape_character()); + break; + } + return true; +} + +template +bool basic_regex_parser::parse_match_any() +{ + // + // we have a '.' that can match any character: + // + ++m_position; + static_cast( + this->append_state(syntax_element_wild, sizeof(re_dot)) + )->mask = static_cast(this->flags() & regbase::no_mod_s + ? re_detail::force_not_newline + : this->flags() & regbase::mod_s ? + re_detail::force_newline : re_detail::dont_care); + return true; +} + +template +bool basic_regex_parser::parse_repeat(std::size_t low, std::size_t high) +{ + bool greedy = true; + bool pocessive = false; + std::size_t insert_point; + // + // when we get to here we may have a non-greedy ? mark still to come: + // + if((m_position != m_end) + && ( + (0 == (this->flags() & (regbase::main_option_type | regbase::no_perl_ex))) + || ((regbase::basic_syntax_group|regbase::emacs_ex) == (this->flags() & (regbase::main_option_type | regbase::emacs_ex))) + ) + ) + { + // OK we have a perl or emacs regex, check for a '?': + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_question) + { + greedy = false; + ++m_position; + } + // for perl regexes only check for pocessive ++ repeats. + if((0 == (this->flags() & regbase::main_option_type)) + && (this->m_traits.syntax_type(*m_position) == regex_constants::syntax_plus)) + { + pocessive = true; + ++m_position; + } + } + if(0 == this->m_last_state) + { + fail(regex_constants::error_badrepeat, ::cutl_details_boost::re_detail::distance(m_base, m_position), "Nothing to repeat."); + return false; + } + if(this->m_last_state->type == syntax_element_endmark) + { + // insert a repeat before the '(' matching the last ')': + insert_point = this->m_paren_start; + } + else if((this->m_last_state->type == syntax_element_literal) && (static_cast(this->m_last_state)->length > 1)) + { + // the last state was a literal with more than one character, split it in two: + re_literal* lit = static_cast(this->m_last_state); + charT c = (static_cast(static_cast(lit+1)))[lit->length - 1]; + --(lit->length); + // now append new state: + lit = static_cast(this->append_state(syntax_element_literal, sizeof(re_literal) + sizeof(charT))); + lit->length = 1; + (static_cast(static_cast(lit+1)))[0] = c; + insert_point = this->getoffset(this->m_last_state); + } + else + { + // repeat the last state whatever it was, need to add some error checking here: + switch(this->m_last_state->type) + { + case syntax_element_start_line: + case syntax_element_end_line: + case syntax_element_word_boundary: + case syntax_element_within_word: + case syntax_element_word_start: + case syntax_element_word_end: + case syntax_element_buffer_start: + case syntax_element_buffer_end: + case syntax_element_alt: + case syntax_element_soft_buffer_end: + case syntax_element_restart_continue: + case syntax_element_jump: + case syntax_element_startmark: + case syntax_element_backstep: + // can't legally repeat any of the above: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + default: + // do nothing... + break; + } + insert_point = this->getoffset(this->m_last_state); + } + // + // OK we now know what to repeat, so insert the repeat around it: + // + re_repeat* rep = static_cast(this->insert_state(insert_point, syntax_element_rep, re_repeater_size)); + rep->min = low; + rep->max = high; + rep->greedy = greedy; + rep->leading = false; + // store our repeater position for later: + std::ptrdiff_t rep_off = this->getoffset(rep); + // and append a back jump to the repeat: + re_jump* jmp = static_cast(this->append_state(syntax_element_jump, sizeof(re_jump))); + jmp->alt.i = rep_off - this->getoffset(jmp); + this->m_pdata->m_data.align(); + // now fill in the alt jump for the repeat: + rep = static_cast(this->getaddress(rep_off)); + rep->alt.i = this->m_pdata->m_data.size() - rep_off; + // + // If the repeat is pocessive then bracket the repeat with a (?>...) + // independent sub-expression construct: + // + if(pocessive) + { + if(m_position != m_end) + { + // + // Check for illegal following quantifier, we have to do this here, because + // the extra states we insert below circumvents are usual error checking :-( + // + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_star: + case regex_constants::syntax_plus: + case regex_constants::syntax_question: + fail(regex_constants::error_badrepeat, m_position - m_base); + return false; + } + } + re_brace* pb = static_cast(this->insert_state(insert_point, syntax_element_startmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + jmp = static_cast(this->insert_state(insert_point + sizeof(re_brace), syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = -3; + pb->icase = this->flags() & regbase::icase; + } + return true; +} + +template +bool basic_regex_parser::parse_repeat_range(bool isbasic) +{ + static const char* incomplete_message = "Missing } in quantified repetition."; + // + // parse a repeat-range: + // + std::size_t min, max; + int v; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + // fail if at end: + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // get min: + v = this->m_traits.toi(m_position, m_end, 10); + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(v < 0) + { + fail(regex_constants::error_badbrace, this->m_position - this->m_base); + return false; + } + else if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + min = v; + // see if we have a comma: + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_comma) + { + // move on and error check: + ++m_position; + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // get the value if any: + v = this->m_traits.toi(m_position, m_end, 10); + max = (v >= 0) ? v : (std::numeric_limits::max)(); + } + else + { + // no comma, max = min: + max = min; + } + // skip whitespace: + while((m_position != m_end) && this->m_traits.isctype(*m_position, this->m_mask_space)) + ++m_position; + // OK now check trailing }: + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + if(isbasic) + { + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_escape) + { + ++m_position; + if(this->m_position == this->m_end) + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + } + else + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_brace) + ++m_position; + else + { + fail(regex_constants::error_brace, this->m_position - this->m_base, incomplete_message); + return false; + } + // + // finally go and add the repeat, unless error: + // + if(min > max) + { + // Backtrack to error location: + m_position -= 2; + while(this->m_traits.isctype(*m_position, this->m_word_mask)) --m_position; + ++m_position; + fail(regex_constants::error_badbrace, m_position - m_base); + return false; + } + return parse_repeat(min, max); +} + +template +bool basic_regex_parser::parse_alt() +{ + // + // error check: if there have been no previous states, + // or if the last state was a '(' then error: + // + if( + ((this->m_last_state == 0) || (this->m_last_state->type == syntax_element_startmark)) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base, "A regular expression can start with the alternation operator |."); + return false; + } + // + // Reset mark count if required: + // + if(m_max_mark < m_mark_count) + m_max_mark = m_mark_count; + if(m_mark_reset >= 0) + m_mark_count = m_mark_reset; + + ++m_position; + // + // we need to append a trailing jump: + // + re_syntax_base* pj = this->append_state(re_detail::syntax_element_jump, sizeof(re_jump)); + std::ptrdiff_t jump_offset = this->getoffset(pj); + // + // now insert the alternative: + // + re_alt* palt = static_cast(this->insert_state(this->m_alt_insert_point, syntax_element_alt, re_alt_size)); + jump_offset += re_alt_size; + this->m_pdata->m_data.align(); + palt->alt.i = this->m_pdata->m_data.size() - this->getoffset(palt); + // + // update m_alt_insert_point so that the next alternate gets + // inserted at the start of the second of the two we've just created: + // + this->m_alt_insert_point = this->m_pdata->m_data.size(); + // + // the start of this alternative must have a case changes state + // if the current block has messed around with case changes: + // + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->m_icase; + } + // + // push the alternative onto our stack, a recursive + // implementation here is easier to understand (and faster + // as it happens), but causes all kinds of stack overflow problems + // on programs with small stacks (COM+). + // + m_alt_jumps.push_back(jump_offset); + return true; +} + +template +bool basic_regex_parser::parse_set() +{ + static const char* incomplete_message = "Character set declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; + ++m_position; + if(m_position == m_end) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + basic_char_set char_set; + + const charT* base = m_position; // where the '[' was + const charT* item_base = m_position; // where the '[' or '^' was + + while(m_position != m_end) + { + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_caret: + if(m_position == base) + { + char_set.negate(); + ++m_position; + item_base = m_position; + } + else + parse_set_literal(char_set); + break; + case regex_constants::syntax_close_set: + if(m_position == item_base) + { + parse_set_literal(char_set); + break; + } + else + { + ++m_position; + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + } + return true; + case regex_constants::syntax_open_set: + if(parse_inner_set(char_set)) + break; + return true; + case regex_constants::syntax_escape: + { + // + // look ahead and see if this is a character class shortcut + // \d \w \s etc... + // + ++m_position; + if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_class) + { + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_class(m); + ++m_position; + break; + } + } + else if(this->m_traits.escape_syntax_type(*m_position) + == regex_constants::escape_type_not_class) + { + // negated character class: + char_class_type m = this->m_traits.lookup_classname(m_position, m_position+1); + if(m != 0) + { + char_set.add_negated_class(m); + ++m_position; + break; + } + } + // not a character class, just a regular escape: + --m_position; + parse_set_literal(char_set); + break; + } + default: + parse_set_literal(char_set); + break; + } + } + return m_position != m_end; +} + +template +bool basic_regex_parser::parse_inner_set(basic_char_set& char_set) +{ + static const char* incomplete_message = "Character class declaration starting with [ terminated prematurely - either no ] was found or the set had no content."; + // + // we have either a character class [:name:] + // a collating element [.name.] + // or an equivalence class [=name=] + // + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dot: + // + // a collating element is treated as a literal: + // + --m_position; + parse_set_literal(char_set); + return true; + case regex_constants::syntax_colon: + { + // check that character classes are actually enabled: + if((this->flags() & (regbase::main_option_type | regbase::no_char_classes)) + == (regbase::basic_syntax_group | regbase::no_char_classes)) + { + --m_position; + parse_set_literal(char_set); + return true; + } + // skip the ':' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_colon)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + // + // check for negated class: + // + bool negated = false; + if(this->m_traits.syntax_type(*name_first) == regex_constants::syntax_caret) + { + ++name_first; + negated = true; + } + typedef typename traits::char_class_type mask_type; + mask_type m = this->m_traits.lookup_classname(name_first, name_last); + if(m == 0) + { + if(char_set.empty() && (name_last - name_first == 1)) + { + // maybe a special case: + ++m_position; + if( (m_position != m_end) + && (this->m_traits.syntax_type(*m_position) + == regex_constants::syntax_close_set)) + { + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_left_word) + { + ++m_position; + this->append_state(syntax_element_word_start); + return false; + } + if(this->m_traits.escape_syntax_type(*name_first) + == regex_constants::escape_type_right_word) + { + ++m_position; + this->append_state(syntax_element_word_end); + return false; + } + } + } + fail(regex_constants::error_ctype, name_first - m_base); + return false; + } + if(negated == false) + char_set.add_class(m); + else + char_set.add_negated_class(m); + ++m_position; + break; + } + case regex_constants::syntax_equal: + { + // skip the '=' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching '=]' + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_brack, m_position - m_base, incomplete_message); + return false; + } + string_type m = this->m_traits.lookup_collatename(name_first, name_last); + if((0 == m.size()) || (m.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return false; + } + digraph d; + d.first = m[0]; + if(m.size() > 1) + d.second = m[1]; + else + d.second = 0; + char_set.add_equivalent(d); + ++m_position; + break; + } + default: + --m_position; + parse_set_literal(char_set); + break; + } + return true; +} + +template +void basic_regex_parser::parse_set_literal(basic_char_set& char_set) +{ + digraph start_range(get_next_set_literal(char_set)); + if(m_end == m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + // we have a range: + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set) + { + digraph end_range = get_next_set_literal(char_set); + char_set.add_range(start_range, end_range); + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_dash) + { + if(m_end == ++m_position) + { + fail(regex_constants::error_brack, m_position - m_base); + return; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_set) + { + // trailing - : + --m_position; + return; + } + fail(regex_constants::error_range, m_position - m_base); + return; + } + return; + } + --m_position; + } + char_set.add_single(start_range); +} + +template +digraph basic_regex_parser::get_next_set_literal(basic_char_set& char_set) +{ + digraph result; + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_dash: + if(!char_set.empty()) + { + // see if we are at the end of the set: + if((++m_position == m_end) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_range, m_position - m_base); + return result; + } + --m_position; + } + result.first = *m_position++; + return result; + case regex_constants::syntax_escape: + // check to see if escapes are supported first: + if(this->flags() & regex_constants::no_escape_in_lists) + { + result = *m_position++; + break; + } + ++m_position; + result = unescape_character(); + break; + case regex_constants::syntax_open_set: + { + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot) + { + --m_position; + result.first = *m_position; + ++m_position; + return result; + } + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, m_position - m_base); + return result; + } + const charT* name_first = m_position; + // skip at least one character, then find the matching ':]' + if(m_end == ++m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_dot)) + ++m_position; + const charT* name_last = m_position; + if(m_end == m_position) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + if((m_end == ++m_position) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_set)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + ++m_position; + string_type s = this->m_traits.lookup_collatename(name_first, name_last); + if(s.empty() || (s.size() > 2)) + { + fail(regex_constants::error_collate, name_first - m_base); + return result; + } + result.first = s[0]; + if(s.size() > 1) + result.second = s[1]; + else + result.second = 0; + return result; + } + default: + result = *m_position++; + } + return result; +} + +// +// does a value fit in the specified charT type? +// +template +bool valid_value(charT, int v, const mpl::true_&) +{ + return (v >> (sizeof(charT) * CHAR_BIT)) == 0; +} +template +bool valid_value(charT, int, const mpl::false_&) +{ + return true; // v will alsways fit in a charT +} +template +bool valid_value(charT c, int v) +{ + return valid_value(c, v, mpl::bool_<(sizeof(charT) < sizeof(int))>()); +} + +template +charT basic_regex_parser::unescape_character() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + charT result(0); + if(m_position == m_end) + { + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence terminated prematurely."); + return false; + } + switch(this->m_traits.escape_syntax_type(*m_position)) + { + case regex_constants::escape_type_control_a: + result = charT('\a'); + break; + case regex_constants::escape_type_e: + result = charT(27); + break; + case regex_constants::escape_type_control_f: + result = charT('\f'); + break; + case regex_constants::escape_type_control_n: + result = charT('\n'); + break; + case regex_constants::escape_type_control_r: + result = charT('\r'); + break; + case regex_constants::escape_type_control_t: + result = charT('\t'); + break; + case regex_constants::escape_type_control_v: + result = charT('\v'); + break; + case regex_constants::escape_type_word_assert: + result = charT('\b'); + break; + case regex_constants::escape_type_ascii_control: + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "ASCII escape sequence terminated prematurely."); + return result; + } + result = static_cast(*m_position % 32); + break; + case regex_constants::escape_type_hex: + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Hexadecimal escape sequence terminated prematurely."); + return result; + } + // maybe have \x{ddd} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Missing } in hexadecimal escape sequence."); + return result; + } + int i = this->m_traits.toi(m_position, m_end, 16); + if((m_position == m_end) + || (i < 0) + || ((std::numeric_limits::is_specialized) && (i > (int)(std::numeric_limits::max)())) + || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_badbrace, m_position - m_base, "Hexadecimal escape sequence was invalid."); + return result; + } + ++m_position; + result = charT(i); + } + else + { + std::ptrdiff_t len = (std::min)(static_cast(2), static_cast(m_end - m_position)); + int i = this->m_traits.toi(m_position, m_position + len, 16); + if((i < 0) + || !valid_value(charT(0), i)) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Escape sequence did not encode a valid character."); + return result; + } + result = charT(i); + } + return result; + case regex_constants::syntax_digit: + { + // an octal escape sequence, the first character must be a zero + // followed by up to 3 octal digits: + std::ptrdiff_t len = (std::min)(::cutl_details_boost::re_detail::distance(m_position, m_end), static_cast(4)); + const charT* bp = m_position; + int val = this->m_traits.toi(bp, bp + 1, 8); + if(val != 0) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + // Oops not an octal escape after all: + fail(regex_constants::error_escape, m_position - m_base, "Invalid octal escape sequence."); + return result; + } + val = this->m_traits.toi(m_position, m_position + len, 8); + if(val < 0) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base, "Octal escape sequence is invalid."); + return result; + } + return static_cast(val); + } + case regex_constants::escape_type_named_char: + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + // maybe have \N{name} + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_open_brace) + { + const charT* base = m_position; + // skip forward until we find enclosing brace: + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_brace)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + string_type s = this->m_traits.lookup_collatename(++base, m_position++); + if(s.empty()) + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_collate, m_position - m_base); + return false; + } + if(s.size() == 1) + { + return s[0]; + } + } + // fall through is a failure: + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + default: + result = *m_position; + break; + } + ++m_position; + return result; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool basic_regex_parser::parse_backref() +{ + BOOST_ASSERT(m_position != m_end); + const charT* pc = m_position; + int i = this->m_traits.toi(pc, pc + 1, 10); + if((i == 0) || (((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) && (this->flags() & regbase::no_bk_refs))) + { + // not a backref at all but an octal escape sequence: + charT c = unescape_character(); + this->append_literal(c); + } + else if((i > 0) && (this->m_backrefs & (1u << (i-1)))) + { + m_position = pc; + re_brace* pb = static_cast(this->append_state(syntax_element_backref, sizeof(re_brace))); + pb->index = i; + pb->icase = this->flags() & regbase::icase; + } + else + { + // Rewind to start of escape: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_backref, m_position - m_base); + return false; + } + return true; +} + +template +bool basic_regex_parser::parse_QE() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // + // parse a \Q...\E sequence: + // + ++m_position; // skip the Q + const charT* start = m_position; + const charT* end; + do + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape)) + ++m_position; + if(m_position == m_end) + { + // a \Q...\E sequence may terminate with the end of the expression: + end = m_position; + break; + } + if(++m_position == m_end) // skip the escape + { + fail(regex_constants::error_escape, m_position - m_base, "Unterminated \\Q...\\E sequence."); + return false; + } + // check to see if it's a \E: + if(this->m_traits.escape_syntax_type(*m_position) == regex_constants::escape_type_E) + { + ++m_position; + end = m_position - 2; + break; + } + // otherwise go round again: + }while(true); + // + // now add all the character between the two escapes as literals: + // + while(start != end) + { + this->append_literal(*start); + ++start; + } + return true; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool basic_regex_parser::parse_perl_extension() +{ + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + // + // treat comments as a special case, as these + // are the only ones that don't start with a leading + // startmark state: + // + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_hash) + { + while((m_position != m_end) + && (this->m_traits.syntax_type(*m_position++) != regex_constants::syntax_close_mark)) + {} + return true; + } + // + // backup some state, and prepare the way: + // + int markid = 0; + std::ptrdiff_t jump_offset = 0; + re_brace* pb = static_cast(this->append_state(syntax_element_startmark, sizeof(re_brace))); + pb->icase = this->flags() & regbase::icase; + std::ptrdiff_t last_paren_start = this->getoffset(pb); + // back up insertion point for alternations, and set new point: + std::ptrdiff_t last_alt_point = m_alt_insert_point; + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + std::ptrdiff_t expected_alt_point = m_alt_insert_point; + bool restore_flags = true; + regex_constants::syntax_option_type old_flags = this->flags(); + bool old_case_change = m_has_case_change; + m_has_case_change = false; + charT name_delim; + int mark_reset = m_mark_reset; + int max_mark = m_max_mark; + m_mark_reset = -1; + m_max_mark = m_mark_count; + int v; + // + // select the actual extension used: + // + switch(this->m_traits.syntax_type(*m_position)) + { + case regex_constants::syntax_or: + m_mark_reset = m_mark_count; + // fall through: + case regex_constants::syntax_colon: + // + // a non-capturing mark: + // + pb->index = markid = 0; + ++m_position; + break; + case regex_constants::syntax_digit: + { + // + // a recursive subexpression: + // + v = this->m_traits.toi(m_position, m_end, 10); + if((v < 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "The recursive sub-expression refers to an invalid marking group, or is unterminated."); + return false; + } +insert_recursion: + pb->index = markid = 0; + re_recurse* pr = static_cast(this->append_state(syntax_element_recurse, sizeof(re_recurse))); + pr->alt.i = v; + pr->state_id = 0; + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = this->flags() & regbase::icase; + break; + } + case regex_constants::syntax_plus: + // + // A forward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if((v <= 0) || (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + v += m_mark_count; + goto insert_recursion; + case regex_constants::syntax_dash: + // + // Possibly a backward-relative recursive subexpression: + // + ++m_position; + v = this->m_traits.toi(m_position, m_end, 10); + if(v <= 0) + { + --m_position; + // Oops not a relative recursion at all, but a (?-imsx) group: + goto option_group_jump; + } + v = m_mark_count + 1 - v; + if(v <= 0) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "An invalid or unterminated recursive sub-expression."); + return false; + } + goto insert_recursion; + case regex_constants::syntax_equal: + pb->index = markid = -1; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_not: + pb->index = markid = -2; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::escape_type_left_word: + { + // a lookbehind assertion: + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + regex_constants::syntax_type t = this->m_traits.syntax_type(*m_position); + if(t == regex_constants::syntax_not) + pb->index = markid = -2; + else if(t == regex_constants::syntax_equal) + pb->index = markid = -1; + else + { + // Probably a named capture which also starts (?< : + name_delim = '>'; + --m_position; + goto named_capture_jump; + } + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->append_state(syntax_element_backstep, sizeof(re_brace)); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + } + case regex_constants::escape_type_right_word: + // + // an independent sub-expression: + // + pb->index = markid = -3; + ++m_position; + jump_offset = this->getoffset(this->append_state(syntax_element_jump, sizeof(re_jump))); + this->m_pdata->m_data.align(); + m_alt_insert_point = this->m_pdata->m_data.size(); + break; + case regex_constants::syntax_open_mark: + { + // a conditional expression: + pb->index = markid = -4; + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = this->m_traits.toi(m_position, m_end, 10); + if(*m_position == charT('R')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('&')) + { + const charT* base = ++m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = -static_cast(hash_value_from_capture_name(base, m_position)); + } + else + { + v = -this->m_traits.toi(m_position, m_end, 10); + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v < 0 ? (v - 1) : 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if((*m_position == charT('\'')) || (*m_position == charT('<'))) + { + const charT* base = ++m_position; + while((m_position != m_end) && (*m_position != charT('>')) && (*m_position != charT('\''))) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v; + if(((*m_position != charT('>')) && (*m_position != charT('\''))) || (++m_position == m_end)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Unterminated named capture."); + return false; + } + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(*m_position == charT('D')) + { + const char* def = "DEFINE"; + while(*def && (m_position != m_end) && (*m_position == charT(*def))) + ++m_position, ++def; + if((m_position == m_end) || *def) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = 9999; // special magic value! + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else if(v > 0) + { + re_brace* br = static_cast(this->append_state(syntax_element_assert_backref, sizeof(re_brace))); + br->index = v; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + } + else + { + // verify that we have a lookahead or lookbehind assert: + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_question) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(this->m_traits.syntax_type(*m_position) == regex_constants::escape_type_left_word) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + m_position -= 3; + } + else + { + if((this->m_traits.syntax_type(*m_position) != regex_constants::syntax_equal) + && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_not)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + m_position -= 2; + } + } + break; + } + case regex_constants::syntax_close_mark: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + case regex_constants::escape_type_end_buffer: + { + name_delim = *m_position; +named_capture_jump: + markid = 0; + if(0 == (this->flags() & regbase::nosubs)) + { + markid = ++m_mark_count; + #ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair(std::distance(m_base, m_position) - 2, 0)); + #else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.push_back(std::pair((m_position - m_base) - 2, 0)); + #endif + } + pb->index = markid; + const charT* base = ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + while((m_position != m_end) && (*m_position != name_delim)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + this->m_pdata->set_name(base, m_position, markid); + ++m_position; + break; + } + default: + if(*m_position == charT('R')) + { + ++m_position; + v = 0; + if(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + goto insert_recursion; + } + if(*m_position == charT('&')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + if(*m_position == charT('P')) + { + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + if(*m_position == charT('>')) + { + ++m_position; + const charT* base = m_position; + while((m_position != m_end) && (this->m_traits.syntax_type(*m_position) != regex_constants::syntax_close_mark)) + ++m_position; + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + v = static_cast(hash_value_from_capture_name(base, m_position)); + goto insert_recursion; + } + } + // + // lets assume that we have a (?imsx) group and try and parse it: + // +option_group_jump: + regex_constants::syntax_option_type opts = parse_options(); + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + // make a note of whether we have a case change: + m_has_case_change = ((opts & regbase::icase) != (this->flags() & regbase::icase)); + pb->index = markid = 0; + if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark) + { + // update flags and carry on as normal: + this->flags(opts); + restore_flags = false; + old_case_change |= m_has_case_change; // defer end of scope by one ')' + } + else if(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_colon) + { + // update flags and carry on until the matching ')' is found: + this->flags(opts); + ++m_position; + } + else + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base); + return false; + } + + // finally append a case change state if we need it: + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = opts & regbase::icase; + } + + } + // + // now recursively add more states, this will terminate when we get to a + // matching ')' : + // + parse_all(); + // + // Unwind alternatives: + // + if(0 == unwind_alts(last_paren_start)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid alternation operators within (?...) block."); + return false; + } + // + // we either have a ')' or we have run out of characters prematurely: + // + if(m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + this->fail(regex_constants::error_paren, ::cutl_details_boost::re_detail::distance(m_base, m_end)); + return false; + } + BOOST_ASSERT(this->m_traits.syntax_type(*m_position) == regex_constants::syntax_close_mark); + ++m_position; + // + // restore the flags: + // + if(restore_flags) + { + // append a case change state if we need it: + if(m_has_case_change) + { + static_cast( + this->append_state(syntax_element_toggle_case, sizeof(re_case)) + )->icase = old_flags & regbase::icase; + } + this->flags(old_flags); + } + // + // set up the jump pointer if we have one: + // + if(jump_offset) + { + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast(this->getaddress(jump_offset)); + jmp->alt.i = this->m_pdata->m_data.size() - this->getoffset(jmp); + if(this->m_last_state == jmp) + { + // Oops... we didn't have anything inside the assertion: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_perl_extension, m_position - m_base, "Invalid or empty zero width assertion."); + return false; + } + } + // + // verify that if this is conditional expression, that we do have + // an alternative, if not add one: + // + if(markid == -4) + { + re_syntax_base* b = this->getaddress(expected_alt_point); + // Make sure we have exactly one alternative following this state: + if(b->type != syntax_element_alt) + { + re_alt* alt = static_cast(this->insert_state(expected_alt_point, syntax_element_alt, sizeof(re_alt))); + alt->alt.i = this->m_pdata->m_data.size() - this->getoffset(alt); + } + else if(this->getaddress(static_cast(b)->alt.i, b)->type == syntax_element_alt) + { + // Can't have seen more than one alternative: + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "More than one alternation operator | was encountered inside a conditional expression."); + return false; + } + else + { + // We must *not* have seen an alternative inside a (DEFINE) block: + b = this->getaddress(b->next.i, b); + if((b->type == syntax_element_assert_backref) && (static_cast(b)->index == 9999)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_bad_pattern, m_position - m_base, "Alternation operators are not allowed inside a DEFINE block."); + return false; + } + } + // check for invalid repetition of next state: + b = this->getaddress(expected_alt_point); + b = this->getaddress(static_cast(b)->next.i, b); + if((b->type != syntax_element_assert_backref) + && (b->type != syntax_element_startmark)) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_badrepeat, m_position - m_base, "A repetition operator cannot be applied to a zero-width assertion."); + return false; + } + } + // + // append closing parenthesis state: + // + pb = static_cast(this->append_state(syntax_element_endmark, sizeof(re_brace))); + pb->index = markid; + pb->icase = this->flags() & regbase::icase; + this->m_paren_start = last_paren_start; + // + // restore the alternate insertion point: + // + this->m_alt_insert_point = last_alt_point; + // + // and the case change data: + // + m_has_case_change = old_case_change; + // + // And the mark_reset data: + // + if(m_max_mark > m_mark_count) + { + m_mark_count = m_max_mark; + } + m_mark_reset = mark_reset; + m_max_mark = max_mark; + + + if(markid > 0) + { +#ifndef BOOST_NO_STD_DISTANCE + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = std::distance(m_base, m_position) - 1; +#else + if(this->flags() & regbase::save_subexpression_location) + this->m_pdata->m_subs.at(markid - 1).second = (m_position - m_base) - 1; +#endif + // + // allow backrefs to this mark: + // + if((markid > 0) && (markid < (int)(sizeof(unsigned) * CHAR_BIT))) + this->m_backrefs |= 1u << (markid - 1); + } + return true; +} + +template +bool basic_regex_parser::add_emacs_code(bool negate) +{ + // + // parses an emacs style \sx or \Sx construct. + // + if(++m_position == m_end) + { + // Rewind to start of sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_escape) --m_position; + fail(regex_constants::error_escape, m_position - m_base); + return false; + } + basic_char_set char_set; + if(negate) + char_set.negate(); + + static const charT s_punct[5] = { 'p', 'u', 'n', 'c', 't', }; + + switch(*m_position) + { + case 's': + case ' ': + char_set.add_class(this->m_mask_space); + break; + case 'w': + char_set.add_class(this->m_word_mask); + break; + case '_': + char_set.add_single(digraph(charT('$'))); + char_set.add_single(digraph(charT('&'))); + char_set.add_single(digraph(charT('*'))); + char_set.add_single(digraph(charT('+'))); + char_set.add_single(digraph(charT('-'))); + char_set.add_single(digraph(charT('_'))); + char_set.add_single(digraph(charT('<'))); + char_set.add_single(digraph(charT('>'))); + break; + case '.': + char_set.add_class(this->m_traits.lookup_classname(s_punct, s_punct+5)); + break; + case '(': + char_set.add_single(digraph(charT('('))); + char_set.add_single(digraph(charT('['))); + char_set.add_single(digraph(charT('{'))); + break; + case ')': + char_set.add_single(digraph(charT(')'))); + char_set.add_single(digraph(charT(']'))); + char_set.add_single(digraph(charT('}'))); + break; + case '"': + char_set.add_single(digraph(charT('"'))); + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT('`'))); + break; + case '\'': + char_set.add_single(digraph(charT('\''))); + char_set.add_single(digraph(charT(','))); + char_set.add_single(digraph(charT('#'))); + break; + case '<': + char_set.add_single(digraph(charT(';'))); + break; + case '>': + char_set.add_single(digraph(charT('\n'))); + char_set.add_single(digraph(charT('\f'))); + break; + default: + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + if(0 == this->append_set(char_set)) + { + fail(regex_constants::error_ctype, m_position - m_base); + return false; + } + ++m_position; + return true; +} + +template +regex_constants::syntax_option_type basic_regex_parser::parse_options() +{ + // we have a (?imsx-imsx) group, convert it into a set of flags: + regex_constants::syntax_option_type f = this->flags(); + bool breakout = false; + do + { + switch(*m_position) + { + case 's': + f |= regex_constants::mod_s; + f &= ~regex_constants::no_mod_s; + break; + case 'm': + f &= ~regex_constants::no_mod_m; + break; + case 'i': + f |= regex_constants::icase; + break; + case 'x': + f |= regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + + if(*m_position == static_cast('-')) + { + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + do + { + switch(*m_position) + { + case 's': + f &= ~regex_constants::mod_s; + f |= regex_constants::no_mod_s; + break; + case 'm': + f |= regex_constants::no_mod_m; + break; + case 'i': + f &= ~regex_constants::icase; + break; + case 'x': + f &= ~regex_constants::mod_x; + break; + default: + breakout = true; + continue; + } + if(++m_position == m_end) + { + // Rewind to start of (? sequence: + --m_position; + while(this->m_traits.syntax_type(*m_position) != regex_constants::syntax_open_mark) --m_position; + fail(regex_constants::error_paren, m_position - m_base); + return false; + } + } + while(!breakout); + } + return f; +} + +template +bool basic_regex_parser::unwind_alts(std::ptrdiff_t last_paren_start) +{ + // + // If we didn't actually add any states after the last + // alternative then that's an error: + // + if((this->m_alt_insert_point == static_cast(this->m_pdata->m_data.size())) + && m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start) + && + !( + ((this->flags() & regbase::main_option_type) == regbase::perl_syntax_group) + && + ((this->flags() & regbase::no_empty_expressions) == 0) + ) + ) + { + fail(regex_constants::error_empty, this->m_position - this->m_base, "Can't terminate a sub-expression with an alternation operator |."); + return false; + } + // + // Fix up our alternatives: + // + while(m_alt_jumps.size() && (m_alt_jumps.back() > last_paren_start)) + { + // + // fix up the jump to point to the end of the states + // that we've just added: + // + std::ptrdiff_t jump_offset = m_alt_jumps.back(); + m_alt_jumps.pop_back(); + this->m_pdata->m_data.align(); + re_jump* jmp = static_cast(this->getaddress(jump_offset)); + BOOST_ASSERT(jmp->type == syntax_element_jump); + jmp->alt.i = this->m_pdata->m_data.size() - jump_offset; + } + return true; +} + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/cutl/details/boost/regex/v4/c_regex_traits.hpp b/cutl/details/boost/regex/v4/c_regex_traits.hpp new file mode 100644 index 0000000..d4f4c30 --- /dev/null +++ b/cutl/details/boost/regex/v4/c_regex_traits.hpp @@ -0,0 +1,211 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE c_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class that wraps the global C locale. + */ + +#ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_C_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#ifndef BOOST_REGEX_WORKAROUND_HPP +#include +#endif + +#include + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::strlen; using ::tolower; +} +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ + +template +struct c_regex_traits; + +template<> +struct BOOST_REGEX_DECL c_regex_traits +{ + c_regex_traits(){} + typedef char char_type; + typedef std::size_t size_type; + typedef std::string string_type; + struct locale_type{}; + typedef cutl_details_boost::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::strlen)(p); + } + + char translate(char c) const + { + return c; + } + char translate_nocase(char c) const + { + return static_cast((std::tolower)(static_cast(c))); + } + + static string_type BOOST_REGEX_CALL transform(const char* p1, const char* p2); + static string_type BOOST_REGEX_CALL transform_primary(const char* p1, const char* p2); + + static char_class_type BOOST_REGEX_CALL lookup_classname(const char* p1, const char* p2); + static string_type BOOST_REGEX_CALL lookup_collatename(const char* p1, const char* p2); + + static bool BOOST_REGEX_CALL isctype(char, char_class_type); + static int BOOST_REGEX_CALL value(char, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#ifndef BOOST_NO_WREGEX +template<> +struct BOOST_REGEX_DECL c_regex_traits +{ + c_regex_traits(){} + typedef wchar_t char_type; + typedef std::size_t size_type; + typedef std::wstring string_type; + struct locale_type{}; + typedef cutl_details_boost::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::wcslen)(p); + } + + wchar_t translate(wchar_t c) const + { + return c; + } + wchar_t translate_nocase(wchar_t c) const + { + return (std::towlower)(c); + } + + static string_type BOOST_REGEX_CALL transform(const wchar_t* p1, const wchar_t* p2); + static string_type BOOST_REGEX_CALL transform_primary(const wchar_t* p1, const wchar_t* p2); + + static char_class_type BOOST_REGEX_CALL lookup_classname(const wchar_t* p1, const wchar_t* p2); + static string_type BOOST_REGEX_CALL lookup_collatename(const wchar_t* p1, const wchar_t* p2); + + static bool BOOST_REGEX_CALL isctype(wchar_t, char_class_type); + static int BOOST_REGEX_CALL value(wchar_t, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +// +// Provide an unsigned short version as well, so the user can link to this +// no matter whether they build with /Zc:wchar_t or not (MSVC specific). +// +template<> +struct BOOST_REGEX_DECL c_regex_traits +{ + c_regex_traits(){} + typedef unsigned short char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + struct locale_type{}; + typedef cutl_details_boost::uint32_t char_class_type; + + static size_type length(const char_type* p) + { + return (std::wcslen)((const wchar_t*)p); + } + + unsigned short translate(unsigned short c) const + { + return c; + } + unsigned short translate_nocase(unsigned short c) const + { + return (std::towlower)((wchar_t)c); + } + + static string_type BOOST_REGEX_CALL transform(const unsigned short* p1, const unsigned short* p2); + static string_type BOOST_REGEX_CALL transform_primary(const unsigned short* p1, const unsigned short* p2); + + static char_class_type BOOST_REGEX_CALL lookup_classname(const unsigned short* p1, const unsigned short* p2); + static string_type BOOST_REGEX_CALL lookup_collatename(const unsigned short* p1, const unsigned short* p2); + + static bool BOOST_REGEX_CALL isctype(unsigned short, char_class_type); + static int BOOST_REGEX_CALL value(unsigned short, int); + + locale_type imbue(locale_type l) + { return l; } + locale_type getloc()const + { return locale_type(); } + +private: + // this type is not copyable: + c_regex_traits(const c_regex_traits&); + c_regex_traits& operator=(const c_regex_traits&); +}; + +#endif + +#endif // BOOST_NO_WREGEX + +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + + diff --git a/cutl/details/boost/regex/v4/char_regex_traits.hpp b/cutl/details/boost/regex/v4/char_regex_traits.hpp new file mode 100644 index 0000000..4bae0ec --- /dev/null +++ b/cutl/details/boost/regex/v4/char_regex_traits.hpp @@ -0,0 +1,81 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE char_regex_traits.cpp + * VERSION see + * DESCRIPTION: Declares deprecated traits classes char_regex_traits<>. + */ + + +#ifndef BOOST_REGEX_V4_CHAR_REGEX_TRAITS_HPP +#define BOOST_REGEX_V4_CHAR_REGEX_TRAITS_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ + +namespace deprecated{ +// +// class char_regex_traits_i +// provides case insensitive traits classes (deprecated): +template +class char_regex_traits_i : public regex_traits {}; + +template<> +class char_regex_traits_i : public regex_traits +{ +public: + typedef char char_type; + typedef unsigned char uchar_type; + typedef unsigned int size_type; + typedef regex_traits base_type; + +}; + +#ifndef BOOST_NO_WREGEX +template<> +class char_regex_traits_i : public regex_traits +{ +public: + typedef wchar_t char_type; + typedef unsigned short uchar_type; + typedef unsigned int size_type; + typedef regex_traits base_type; + +}; +#endif +} // namespace deprecated +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif // include + diff --git a/cutl/details/boost/regex/v4/cpp_regex_traits.hpp b/cutl/details/boost/regex/v4/cpp_regex_traits.hpp new file mode 100644 index 0000000..7281e84 --- /dev/null +++ b/cutl/details/boost/regex/v4/cpp_regex_traits.hpp @@ -0,0 +1,1088 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cpp_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class cpp_regex_traits. + */ + +#ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED + +#include + +#ifndef BOOST_NO_STD_LOCALE + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#include +#endif +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include +#endif +#ifdef BOOST_HAS_THREADS +#include +#endif +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#include +#endif +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#include +#endif + +#include +#include +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4786 4251) +#endif + +namespace cutl_details_boost{ + +// +// forward declaration is needed by some compilers: +// +template +class cpp_regex_traits; + +namespace re_detail{ + +// +// class parser_buf: +// acts as a stream buffer which wraps around a pair of pointers: +// +template > +class parser_buf : public ::std::basic_streambuf +{ + typedef ::std::basic_streambuf base_type; + typedef typename base_type::int_type int_type; + typedef typename base_type::char_type char_type; + typedef typename base_type::pos_type pos_type; + typedef ::std::streamsize streamsize; + typedef typename base_type::off_type off_type; +public: + parser_buf() : base_type() { setbuf(0, 0); } + const charT* getnext() { return this->gptr(); } +protected: + std::basic_streambuf* setbuf(char_type* s, streamsize n); + typename parser_buf::pos_type seekpos(pos_type sp, ::std::ios_base::openmode which); + typename parser_buf::pos_type seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which); +private: + parser_buf& operator=(const parser_buf&); + parser_buf(const parser_buf&); +}; + +template +std::basic_streambuf* +parser_buf::setbuf(char_type* s, streamsize n) +{ + this->setg(s, s, s + n); + return this; +} + +template +typename parser_buf::pos_type +parser_buf::seekoff(off_type off, ::std::ios_base::seekdir way, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + std::ptrdiff_t size = this->egptr() - this->eback(); + std::ptrdiff_t pos = this->gptr() - this->eback(); + charT* g = this->eback(); + switch(way) + { + case ::std::ios_base::beg: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + off, g + size); + break; + case ::std::ios_base::end: + if((off < 0) || (off > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + size - off, g + size); + break; + case ::std::ios_base::cur: + { + std::ptrdiff_t newpos = static_cast(pos + off); + if((newpos < 0) || (newpos > size)) + return pos_type(off_type(-1)); + else + this->setg(g, g + newpos, g + size); + break; + } + default: ; + } +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4244) +#endif + return static_cast(this->gptr() - this->eback()); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +typename parser_buf::pos_type +parser_buf::seekpos(pos_type sp, ::std::ios_base::openmode which) +{ + if(which & ::std::ios_base::out) + return pos_type(off_type(-1)); + off_type size = static_cast(this->egptr() - this->eback()); + charT* g = this->eback(); + if(off_type(sp) <= size) + { + this->setg(g, g + off_type(sp), g + size); + } + return pos_type(off_type(-1)); +} + +// +// class cpp_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template +struct cpp_regex_traits_base +{ + cpp_regex_traits_base(const std::locale& l) + { imbue(l); } + std::locale imbue(const std::locale& l); + + std::locale m_locale; + std::ctype const* m_pctype; +#ifndef BOOST_NO_STD_MESSAGES + std::messages const* m_pmessages; +#endif + std::collate const* m_pcollate; + + bool operator<(const cpp_regex_traits_base& b)const + { + if(m_pctype == b.m_pctype) + { +#ifndef BOOST_NO_STD_MESSAGES + if(m_pmessages == b.m_pmessages) + { + return m_pcollate < b.m_pcollate; + } + return m_pmessages < b.m_pmessages; +#else + return m_pcollate < b.m_pcollate; +#endif + } + return m_pctype < b.m_pctype; + } + bool operator==(const cpp_regex_traits_base& b)const + { + return (m_pctype == b.m_pctype) +#ifndef BOOST_NO_STD_MESSAGES + && (m_pmessages == b.m_pmessages) +#endif + && (m_pcollate == b.m_pcollate); + } +}; + +template +std::locale cpp_regex_traits_base::imbue(const std::locale& l) +{ + std::locale result(m_locale); + m_locale = l; + m_pctype = &BOOST_USE_FACET(std::ctype, l); +#ifndef BOOST_NO_STD_MESSAGES + m_pmessages = BOOST_HAS_FACET(std::messages, l) ? &BOOST_USE_FACET(std::messages, l) : 0; +#endif + m_pcollate = &BOOST_USE_FACET(std::collate, l); + return result; +} + +// +// class cpp_regex_traits_char_layer: +// implements methods that require specialisation for narrow characters: +// +template +class cpp_regex_traits_char_layer : public cpp_regex_traits_base +{ + typedef std::basic_string string_type; + typedef std::map map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& b) + : cpp_regex_traits_base(b) + { + init(); + } + void init(); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(this->m_pctype->is(std::ctype_base::lower, c)) return regex_constants::escape_type_class; + if(this->m_pctype->is(std::ctype_base::upper, c)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template +void cpp_regex_traits_char_layer::init() +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: +#ifndef BOOST_NO_STD_MESSAGES +#ifndef __IBMCPP__ + typename std::messages::catalog cat = static_cast::catalog>(-1); +#else + typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if(cat_name.size() && (this->m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + cutl_details_boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { +#ifndef BOOST_NO_EXCEPTIONS + try{ +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = this->m_pmessages->get(cat, 0, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + m_char_map[mss[j]] = i; + } + } + this->m_pmessages->close(cat); +#ifndef BOOST_NO_EXCEPTIONS + } + catch(...) + { + if(this->m_pmessages) + this->m_pmessages->close(cat); + throw; + } +#endif + } + else + { +#endif + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + m_char_map[this->m_pctype->widen(*ptr)] = i; + ++ptr; + } + } +#ifndef BOOST_NO_STD_MESSAGES + } +#endif +} + +template +typename cpp_regex_traits_char_layer::string_type + cpp_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, this->m_pctype->widen(*ptr)); + ++ptr; + } + return result; +} + +// +// specialised version for narrow characters: +// +template <> +class BOOST_REGEX_DECL cpp_regex_traits_char_layer : public cpp_regex_traits_base +{ + typedef std::string string_type; +public: + cpp_regex_traits_char_layer(const std::locale& l) + : cpp_regex_traits_base(l) + { + init(); + } + cpp_regex_traits_char_layer(const cpp_regex_traits_base& l) + : cpp_regex_traits_base(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast(c)]; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + void init(); +}; + +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +enum +{ + char_class_space=1<<0, + char_class_print=1<<1, + char_class_cntrl=1<<2, + char_class_upper=1<<3, + char_class_lower=1<<4, + char_class_alpha=1<<5, + char_class_digit=1<<6, + char_class_punct=1<<7, + char_class_xdigit=1<<8, + char_class_alnum=char_class_alpha|char_class_digit, + char_class_graph=char_class_alnum|char_class_punct, + char_class_blank=1<<9, + char_class_word=1<<10, + char_class_unicode=1<<11, + char_class_horizontal_space=1<<12, + char_class_vertical_space=1<<13 +}; + +#endif + +// +// class cpp_regex_traits_implementation: +// provides pimpl implementation for cpp_regex_traits. +// +template +class cpp_regex_traits_implementation : public cpp_regex_traits_char_layer +{ +public: + typedef typename cpp_regex_traits::char_class_type char_class_type; + typedef typename std::ctype::mask native_mask_type; +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + BOOST_STATIC_CONSTANT(char_class_type, mask_blank = 1u << 24); + BOOST_STATIC_CONSTANT(char_class_type, mask_word = 1u << 25); + BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 1u << 26); + BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 1u << 27); + BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 1u << 28); +#endif + + typedef std::basic_string string_type; + typedef charT char_type; + //cpp_regex_traits_implementation(); + cpp_regex_traits_implementation(const std::locale& l) + : cpp_regex_traits_char_layer(l) + { + init(); + } + cpp_regex_traits_implementation(const cpp_regex_traits_base& l) + : cpp_regex_traits_char_layer(l) + { + init(); + } + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + string_type temp(p1, p2); + this->m_pctype->tolower(&*temp.begin(), &*temp.begin() + temp.size()); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const; +private: + std::map m_error_strings; // error messages indexed by numberic ID + std::map m_custom_class_names; // character class names + std::map m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; + void init(); +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +public: + bool isctype(charT c, char_class_type m)const; +#endif +}; + +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET +#if !defined(BOOST_NO_INCLASS_MEMBER_INITIALIZATION) + +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_blank; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_word; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_unicode; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_vertical; +template +typename cpp_regex_traits_implementation::char_class_type const cpp_regex_traits_implementation::mask_horizontal; + +#endif +#endif + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform_primary(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_ASSERT(*p2 == 0); + + string_type result; + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + try{ + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + this->m_pctype->tolower(&*result.begin(), &*result.begin() + result.size()); + result = this->m_pcollate->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->m_pcollate->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->m_pcollate->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + }catch(...){} + while(result.size() && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); + if(result.empty()) + { + // character is ignorable at the primary level: + result = string_type(1, charT(0)); + } + return result; +} + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform(const charT* p1, const charT* p2) const +{ + // + // PRECONDITIONS: + // + // A bug in gcc 3.2 (and maybe other versions as well) treats + // p1 as a null terminated string, for efficiency reasons + // we work around this elsewhere, but just assert here that + // we adhere to gcc's (buggy) preconditions... + // + BOOST_ASSERT(*p2 == 0); + // + // swallowing all exceptions here is a bad idea + // however at least one std lib will always throw + // std::bad_alloc for certain arguments... + // + string_type result; + try{ + result = this->m_pcollate->transform(p1, p2); + // + // Borland's STLPort version returns a NULL-terminated + // string that has garbage at the end - each call to + // std::collate::transform returns a different string! + // So as a workaround, we'll truncate the string at the first NULL + // which _seems_ to work.... +#if BOOST_WORKAROUND(__BORLANDC__, < 0x580) + result.erase(result.find(charT(0))); +#else + // + // some implementations (Dinkumware) append unnecessary trailing \0's: + while(result.size() && (charT(0) == *result.rbegin())) + result.erase(result.size() - 1); +#endif + BOOST_ASSERT(std::find(result.begin(), result.end(), charT(0)) == result.end()); + } + catch(...) + { + } + return result; +} + + +template +typename cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map::const_iterator iter_type; + if(m_custom_collate_names.size()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + std::string name(p1, p2); +#else + std::string name; + const charT* p0 = p1; + while(p0 != p2) + name.append(1, char(*p0++)); +#endif + name = lookup_default_collate_name(name); +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + if(name.size()) + return string_type(name.begin(), name.end()); +#else + if(name.size()) + { + string_type result; + typedef std::string::const_iterator iter; + iter b = name.begin(); + iter e = name.end(); + while(b != e) + result.append(1, charT(*b++)); + return result; + } +#endif + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template +void cpp_regex_traits_implementation::init() +{ +#ifndef BOOST_NO_STD_MESSAGES +#ifndef __IBMCPP__ + typename std::messages::catalog cat = static_cast::catalog>(-1); +#else + typename std::messages::catalog cat = reinterpret_cast::catalog>(-1); +#endif + std::string cat_name(cpp_regex_traits::get_catalog_name()); + if(cat_name.size() && (this->m_pmessages != 0)) + { + cat = this->m_pmessages->open( + cat_name, + this->m_locale); + if((int)cat < 0) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + cutl_details_boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if((int)cat >= 0) + { + // + // Error messages: + // + for(cutl_details_boost::regex_constants::error_type i = static_cast(0); + i <= cutl_details_boost::regex_constants::error_unknown; + i = static_cast(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, this->m_pctype->widen(*p)); + ++p; + } + string_type s = this->m_pmessages->get(cat, 0, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, this->m_pctype->narrow(s[j], 0)); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + static const char_class_type masks[16] = + { + std::ctype::alnum, + std::ctype::alpha, + std::ctype::cntrl, + std::ctype::digit, + std::ctype::graph, + cpp_regex_traits_implementation::mask_horizontal, + std::ctype::lower, + std::ctype::print, + std::ctype::punct, + std::ctype::space, + std::ctype::upper, + cpp_regex_traits_implementation::mask_vertical, + std::ctype::xdigit, + cpp_regex_traits_implementation::mask_blank, + cpp_regex_traits_implementation::mask_word, + cpp_regex_traits_implementation::mask_unicode, + }; +#else + static const char_class_type masks[16] = + { + ::cutl_details_boost::re_detail::char_class_alnum, + ::cutl_details_boost::re_detail::char_class_alpha, + ::cutl_details_boost::re_detail::char_class_cntrl, + ::cutl_details_boost::re_detail::char_class_digit, + ::cutl_details_boost::re_detail::char_class_graph, + ::cutl_details_boost::re_detail::char_class_horizontal_space, + ::cutl_details_boost::re_detail::char_class_lower, + ::cutl_details_boost::re_detail::char_class_print, + ::cutl_details_boost::re_detail::char_class_punct, + ::cutl_details_boost::re_detail::char_class_space, + ::cutl_details_boost::re_detail::char_class_upper, + ::cutl_details_boost::re_detail::char_class_vertical_space, + ::cutl_details_boost::re_detail::char_class_xdigit, + ::cutl_details_boost::re_detail::char_class_blank, + ::cutl_details_boost::re_detail::char_class_word, + ::cutl_details_boost::re_detail::char_class_unicode, + }; +#endif + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(this->m_pmessages->get(cat, 0, j+300, null_string)); + if(s.size()) + this->m_custom_class_names[s] = masks[j]; + } + } +#endif + // + // get the collation format used by m_pcollate: + // + m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim); +} + +template +typename cpp_regex_traits_implementation::char_class_type + cpp_regex_traits_implementation::lookup_classname_imp(const charT* p1, const charT* p2) const +{ +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + static const char_class_type masks[22] = + { + 0, + std::ctype::alnum, + std::ctype::alpha, + cpp_regex_traits_implementation::mask_blank, + std::ctype::cntrl, + std::ctype::digit, + std::ctype::digit, + std::ctype::graph, + cpp_regex_traits_implementation::mask_horizontal, + std::ctype::lower, + std::ctype::lower, + std::ctype::print, + std::ctype::punct, + std::ctype::space, + std::ctype::space, + std::ctype::upper, + cpp_regex_traits_implementation::mask_unicode, + std::ctype::upper, + cpp_regex_traits_implementation::mask_vertical, + std::ctype::alnum | cpp_regex_traits_implementation::mask_word, + std::ctype::alnum | cpp_regex_traits_implementation::mask_word, + std::ctype::xdigit, + }; +#else + static const char_class_type masks[22] = + { + 0, + ::cutl_details_boost::re_detail::char_class_alnum, + ::cutl_details_boost::re_detail::char_class_alpha, + ::cutl_details_boost::re_detail::char_class_blank, + ::cutl_details_boost::re_detail::char_class_cntrl, + ::cutl_details_boost::re_detail::char_class_digit, + ::cutl_details_boost::re_detail::char_class_digit, + ::cutl_details_boost::re_detail::char_class_graph, + ::cutl_details_boost::re_detail::char_class_horizontal_space, + ::cutl_details_boost::re_detail::char_class_lower, + ::cutl_details_boost::re_detail::char_class_lower, + ::cutl_details_boost::re_detail::char_class_print, + ::cutl_details_boost::re_detail::char_class_punct, + ::cutl_details_boost::re_detail::char_class_space, + ::cutl_details_boost::re_detail::char_class_space, + ::cutl_details_boost::re_detail::char_class_upper, + ::cutl_details_boost::re_detail::char_class_unicode, + ::cutl_details_boost::re_detail::char_class_upper, + ::cutl_details_boost::re_detail::char_class_vertical_space, + ::cutl_details_boost::re_detail::char_class_alnum | ::cutl_details_boost::re_detail::char_class_word, + ::cutl_details_boost::re_detail::char_class_alnum | ::cutl_details_boost::re_detail::char_class_word, + ::cutl_details_boost::re_detail::char_class_xdigit, + }; +#endif + if(m_custom_class_names.size()) + { + typedef typename std::map, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2); + BOOST_ASSERT(state_id < sizeof(masks) / sizeof(masks[0])); + return masks[state_id]; +} + +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +template +bool cpp_regex_traits_implementation::isctype(const charT c, char_class_type mask) const +{ + return + ((mask & ::cutl_details_boost::re_detail::char_class_space) && (this->m_pctype->is(std::ctype::space, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_print) && (this->m_pctype->is(std::ctype::print, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_cntrl) && (this->m_pctype->is(std::ctype::cntrl, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_upper) && (this->m_pctype->is(std::ctype::upper, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_lower) && (this->m_pctype->is(std::ctype::lower, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_alpha) && (this->m_pctype->is(std::ctype::alpha, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_digit) && (this->m_pctype->is(std::ctype::digit, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_punct) && (this->m_pctype->is(std::ctype::punct, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_xdigit) && (this->m_pctype->is(std::ctype::xdigit, c))) + || ((mask & ::cutl_details_boost::re_detail::char_class_blank) && (this->m_pctype->is(std::ctype::space, c)) && !::cutl_details_boost::re_detail::is_separator(c)) + || ((mask & ::cutl_details_boost::re_detail::char_class_word) && (c == '_')) + || ((mask & ::cutl_details_boost::re_detail::char_class_unicode) && ::cutl_details_boost::re_detail::is_extended(c)) + || ((mask & ::cutl_details_boost::re_detail::char_class_vertical_space) && (is_separator(c) || (c == '\v'))) + || ((mask & ::cutl_details_boost::re_detail::char_class_horizontal_space) && this->m_pctype->is(std::ctype::space, c) && !(is_separator(c) || (c == '\v'))); +} +#endif + + +template +inline cutl_details_boost::shared_ptr > create_cpp_regex_traits(const std::locale& l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) +{ + cpp_regex_traits_base key(l); + return ::cutl_details_boost::object_cache, cpp_regex_traits_implementation >::get(key, 5); +} + +} // re_detail + +template +class cpp_regex_traits +{ +private: + typedef std::ctype ctype_type; +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + typedef std::locale locale_type; + typedef cutl_details_boost::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + cpp_regex_traits() + : m_pimpl(re_detail::create_cpp_regex_traits(std::locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? m_pimpl->m_pctype->tolower(c) : c; + } + charT tolower(charT c) const + { + return m_pimpl->m_pctype->tolower(c); + } + charT toupper(charT c) const + { + return m_pimpl->m_pctype->toupper(c); + } + string_type transform(const charT* p1, const charT* p2) const + { + return m_pimpl->transform(p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { +#ifndef BOOST_REGEX_BUGGY_CTYPE_FACET + typedef typename std::ctype::mask ctype_mask; + + static const ctype_mask mask_base = + static_cast( + std::ctype::alnum + | std::ctype::alpha + | std::ctype::cntrl + | std::ctype::digit + | std::ctype::graph + | std::ctype::lower + | std::ctype::print + | std::ctype::punct + | std::ctype::space + | std::ctype::upper + | std::ctype::xdigit); + + if((f & mask_base) + && (m_pimpl->m_pctype->is( + static_cast(f & mask_base), c))) + return true; + else if((f & re_detail::cpp_regex_traits_implementation::mask_unicode) && re_detail::is_extended(c)) + return true; + else if((f & re_detail::cpp_regex_traits_implementation::mask_word) && (c == '_')) + return true; + else if((f & re_detail::cpp_regex_traits_implementation::mask_blank) + && m_pimpl->m_pctype->is(std::ctype::space, c) + && !re_detail::is_separator(c)) + return true; + else if((f & re_detail::cpp_regex_traits_implementation::mask_vertical) + && (::cutl_details_boost::re_detail::is_separator(c) || (c == '\v'))) + return true; + else if((f & re_detail::cpp_regex_traits_implementation::mask_horizontal) + && this->isctype(c, std::ctype::space) && !this->isctype(c, re_detail::cpp_regex_traits_implementation::mask_vertical)) + return true; + return false; +#else + return m_pimpl->isctype(c, f); +#endif + } + int toi(const charT*& p1, const charT* p2, int radix)const; + int value(charT c, int radix)const + { + const charT* pc = &c; + return toi(pc, pc + 1, radix); + } + locale_type imbue(locale_type l) + { + std::locale result(getloc()); + m_pimpl = re_detail::create_cpp_regex_traits(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + cutl_details_boost::shared_ptr > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static static_mutex& get_mutex_inst(); +#endif +}; + + +template +int cpp_regex_traits::toi(const charT*& first, const charT* last, int radix)const +{ + re_detail::parser_buf sbuf; // buffer for parsing numbers. + std::basic_istream is(&sbuf); // stream for parsing numbers. + + // we do NOT want to parse any thousands separators inside the stream: + last = std::find(first, last, BOOST_USE_FACET(std::numpunct, is.getloc()).thousands_sep()); + + sbuf.pubsetbuf(const_cast(static_cast(first)), static_cast(last-first)); + is.clear(); + if(std::abs(radix) == 16) is >> std::hex; + else if(std::abs(radix) == 8) is >> std::oct; + else is >> std::dec; + int val; + if(is >> val) + { + first = first + ((last - first) - sbuf.in_avail()); + return val; + } + else + return -1; +} + +template +std::string cpp_regex_traits::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template +std::string& cpp_regex_traits::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template +std::string cpp_regex_traits::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template +static_mutex& cpp_regex_traits::get_mutex_inst() +{ + static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT; + return s_mutex; +} +#endif + + +} // boost + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + +#endif + + diff --git a/cutl/details/boost/regex/v4/cregex.hpp b/cutl/details/boost/regex/v4/cregex.hpp new file mode 100644 index 0000000..0930a84 --- /dev/null +++ b/cutl/details/boost/regex/v4/cregex.hpp @@ -0,0 +1,330 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE cregex.cpp + * VERSION see + * DESCRIPTION: Declares POSIX API functions + * + cutl_details_boost::RegEx high level wrapper. + */ + +#ifndef BOOST_RE_CREGEX_HPP_INCLUDED +#define BOOST_RE_CREGEX_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#include +#include + +#ifdef __cplusplus +#include +#else +#include +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +/* include these defs only for POSIX compatablity */ +#ifdef __cplusplus +namespace cutl_details_boost{ +extern "C" { +#endif + +#if defined(__cplusplus) && !defined(BOOST_NO_STDC_NAMESPACE) +typedef std::ptrdiff_t regoff_t; +typedef std::size_t regsize_t; +#else +typedef ptrdiff_t regoff_t; +typedef size_t regsize_t; +#endif + +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const char* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tA; + +#ifndef BOOST_NO_WREGEX +typedef struct +{ + unsigned int re_magic; +#ifdef __cplusplus + std::size_t re_nsub; /* number of parenthesized subexpressions */ +#else + size_t re_nsub; +#endif + const wchar_t* re_endp; /* end pointer for REG_PEND */ + void* guts; /* none of your business :-) */ + match_flag_type eflags; /* none of your business :-) */ +} regex_tW; +#endif + +typedef struct +{ + regoff_t rm_so; /* start of match */ + regoff_t rm_eo; /* end of match */ +} regmatch_t; + +/* regcomp() flags */ +typedef enum{ + REG_BASIC = 0000, + REG_EXTENDED = 0001, + REG_ICASE = 0002, + REG_NOSUB = 0004, + REG_NEWLINE = 0010, + REG_NOSPEC = 0020, + REG_PEND = 0040, + REG_DUMP = 0200, + REG_NOCOLLATE = 0400, + REG_ESCAPE_IN_LISTS = 01000, + REG_NEWLINE_ALT = 02000, + REG_PERLEX = 04000, + + REG_PERL = REG_EXTENDED | REG_NOCOLLATE | REG_ESCAPE_IN_LISTS | REG_PERLEX, + REG_AWK = REG_EXTENDED | REG_ESCAPE_IN_LISTS, + REG_GREP = REG_BASIC | REG_NEWLINE_ALT, + REG_EGREP = REG_EXTENDED | REG_NEWLINE_ALT, + + REG_ASSERT = 15, + REG_INVARG = 16, + REG_ATOI = 255, /* convert name to number (!) */ + REG_ITOA = 0400 /* convert number to name (!) */ +} reg_comp_flags; + +/* regexec() flags */ +typedef enum{ + REG_NOTBOL = 00001, + REG_NOTEOL = 00002, + REG_STARTEND = 00004 +} reg_exec_flags; + +/* + * POSIX error codes: + */ +typedef unsigned reg_error_t; +typedef reg_error_t reg_errcode_t; /* backwards compatibility */ + +static const reg_error_t REG_NOERROR = 0; /* Success. */ +static const reg_error_t REG_NOMATCH = 1; /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ +static const reg_error_t REG_BADPAT = 2; /* Invalid pattern. */ +static const reg_error_t REG_ECOLLATE = 3; /* Undefined collating element. */ +static const reg_error_t REG_ECTYPE = 4; /* Invalid character class name. */ +static const reg_error_t REG_EESCAPE = 5; /* Trailing backslash. */ +static const reg_error_t REG_ESUBREG = 6; /* Invalid back reference. */ +static const reg_error_t REG_EBRACK = 7; /* Unmatched left bracket. */ +static const reg_error_t REG_EPAREN = 8; /* Parenthesis imbalance. */ +static const reg_error_t REG_EBRACE = 9; /* Unmatched \{. */ +static const reg_error_t REG_BADBR = 10; /* Invalid contents of \{\}. */ +static const reg_error_t REG_ERANGE = 11; /* Invalid range end. */ +static const reg_error_t REG_ESPACE = 12; /* Ran out of memory. */ +static const reg_error_t REG_BADRPT = 13; /* No preceding re for repetition op. */ +static const reg_error_t REG_EEND = 14; /* unexpected end of expression */ +static const reg_error_t REG_ESIZE = 15; /* expression too big */ +static const reg_error_t REG_ERPAREN = 8; /* = REG_EPAREN : unmatched right parenthesis */ +static const reg_error_t REG_EMPTY = 17; /* empty expression */ +static const reg_error_t REG_E_MEMORY = 15; /* = REG_ESIZE : out of memory */ +static const reg_error_t REG_ECOMPLEXITY = 18; /* complexity too high */ +static const reg_error_t REG_ESTACK = 19; /* out of stack space */ +static const reg_error_t REG_E_PERL = 20; /* Perl (?...) error */ +static const reg_error_t REG_E_UNKNOWN = 21; /* unknown error */ +static const reg_error_t REG_ENOSYS = 21; /* = REG_E_UNKNOWN : Reserved. */ + +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompA(regex_tA*, const char*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorA(int, const regex_tA*, char*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecA(const regex_tA*, const char*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeA(regex_tA*); + +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regcompW(regex_tW*, const wchar_t*, int); +BOOST_REGEX_DECL regsize_t BOOST_REGEX_CCALL regerrorW(int, const regex_tW*, wchar_t*, regsize_t); +BOOST_REGEX_DECL int BOOST_REGEX_CCALL regexecW(const regex_tW*, const wchar_t*, regsize_t, regmatch_t*, int); +BOOST_REGEX_DECL void BOOST_REGEX_CCALL regfreeW(regex_tW*); +#endif + +#ifdef UNICODE +#define regcomp regcompW +#define regerror regerrorW +#define regexec regexecW +#define regfree regfreeW +#define regex_t regex_tW +#else +#define regcomp regcompA +#define regerror regerrorA +#define regexec regexecA +#define regfree regfreeA +#define regex_t regex_tA +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef __cplusplus +} /* extern "C" */ +} /* namespace */ +#endif + +#if defined(__cplusplus) +/* + * C++ high level wrapper goes here: + */ +#include +#include +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +class RegEx; + +namespace re_detail{ + +class RegExData; +struct pred1; +struct pred2; +struct pred3; +struct pred4; + +} /* namespace re_detail */ + +#if (defined(BOOST_MSVC) || defined(__BORLANDC__)) && !defined(BOOST_DISABLE_WIN32) +typedef bool (__cdecl *GrepCallback)(const RegEx& expression); +typedef bool (__cdecl *GrepFileCallback)(const char* file, const RegEx& expression); +typedef bool (__cdecl *FindFilesCallback)(const char* file); +#else +typedef bool (*GrepCallback)(const RegEx& expression); +typedef bool (*GrepFileCallback)(const char* file, const RegEx& expression); +typedef bool (*FindFilesCallback)(const char* file); +#endif + +class BOOST_REGEX_DECL RegEx +{ +private: + re_detail::RegExData* pdata; +public: + RegEx(); + RegEx(const RegEx& o); + ~RegEx(); + explicit RegEx(const char* c, bool icase = false); + explicit RegEx(const std::string& s, bool icase = false); + RegEx& operator=(const RegEx& o); + RegEx& operator=(const char* p); + RegEx& operator=(const std::string& s){ return this->operator=(s.c_str()); } + unsigned int SetExpression(const char* p, bool icase = false); + unsigned int SetExpression(const std::string& s, bool icase = false){ return SetExpression(s.c_str(), icase); } + std::string Expression()const; + unsigned int error_code()const; + /* + * now matching operators: + */ + bool Match(const char* p, match_flag_type flags = match_default); + bool Match(const std::string& s, match_flag_type flags = match_default) { return Match(s.c_str(), flags); } + bool Search(const char* p, match_flag_type flags = match_default); + bool Search(const std::string& s, match_flag_type flags = match_default) { return Search(s.c_str(), flags); } + unsigned int Grep(GrepCallback cb, const char* p, match_flag_type flags = match_default); + unsigned int Grep(GrepCallback cb, const std::string& s, match_flag_type flags = match_default) { return Grep(cb, s.c_str(), flags); } + unsigned int Grep(std::vector& v, const char* p, match_flag_type flags = match_default); + unsigned int Grep(std::vector& v, const std::string& s, match_flag_type flags = match_default) { return Grep(v, s.c_str(), flags); } + unsigned int Grep(std::vector& v, const char* p, match_flag_type flags = match_default); + unsigned int Grep(std::vector& v, const std::string& s, match_flag_type flags = match_default) { return Grep(v, s.c_str(), flags); } +#ifndef BOOST_REGEX_NO_FILEITER + unsigned int GrepFiles(GrepFileCallback cb, const char* files, bool recurse = false, match_flag_type flags = match_default); + unsigned int GrepFiles(GrepFileCallback cb, const std::string& files, bool recurse = false, match_flag_type flags = match_default) { return GrepFiles(cb, files.c_str(), recurse, flags); } + unsigned int FindFiles(FindFilesCallback cb, const char* files, bool recurse = false, match_flag_type flags = match_default); + unsigned int FindFiles(FindFilesCallback cb, const std::string& files, bool recurse = false, match_flag_type flags = match_default) { return FindFiles(cb, files.c_str(), recurse, flags); } +#endif + + std::string Merge(const std::string& in, const std::string& fmt, + bool copy = true, match_flag_type flags = match_default); + std::string Merge(const char* in, const char* fmt, + bool copy = true, match_flag_type flags = match_default); + + std::size_t Split(std::vector& v, std::string& s, match_flag_type flags = match_default, unsigned max_count = ~0); + /* + * now operators for returning what matched in more detail: + */ + std::size_t Position(int i = 0)const; + std::size_t Length(int i = 0)const; + bool Matched(int i = 0)const; + std::size_t Marks()const; + std::string What(int i = 0)const; + std::string operator[](int i)const { return What(i); } + + static const std::size_t npos; + + friend struct re_detail::pred1; + friend struct re_detail::pred2; + friend struct re_detail::pred3; + friend struct re_detail::pred4; +}; + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} /* namespace cutl_details_boost */ + +#endif /* __cplusplus */ + +#endif /* include guard */ + + + + + + + + + + diff --git a/cutl/details/boost/regex/v4/error_type.hpp b/cutl/details/boost/regex/v4/error_type.hpp new file mode 100644 index 0000000..db00e46 --- /dev/null +++ b/cutl/details/boost/regex/v4/error_type.hpp @@ -0,0 +1,59 @@ +/* + * + * Copyright (c) 2003-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE error_type.hpp + * VERSION see + * DESCRIPTION: Declares regular expression error type enumerator. + */ + +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#define BOOST_REGEX_ERROR_TYPE_HPP + +#ifdef __cplusplus +namespace cutl_details_boost{ +#endif + +#ifdef __cplusplus +namespace regex_constants{ + +enum error_type{ + + error_ok = 0, /* not used */ + error_no_match = 1, /* not used */ + error_bad_pattern = 2, + error_collate = 3, + error_ctype = 4, + error_escape = 5, + error_backref = 6, + error_brack = 7, + error_paren = 8, + error_brace = 9, + error_badbrace = 10, + error_range = 11, + error_space = 12, + error_badrepeat = 13, + error_end = 14, /* not used */ + error_size = 15, + error_right_paren = 16, /* not used */ + error_empty = 17, + error_complexity = 18, + error_stack = 19, + error_perl_extension = 20, + error_unknown = 21 +}; + +} +} +#endif /* __cplusplus */ + +#endif diff --git a/cutl/details/boost/regex/v4/fileiter.hpp b/cutl/details/boost/regex/v4/fileiter.hpp new file mode 100644 index 0000000..8474f0a --- /dev/null +++ b/cutl/details/boost/regex/v4/fileiter.hpp @@ -0,0 +1,455 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE fileiter.hpp + * VERSION see + * DESCRIPTION: Declares various platform independent file and + * directory iterators, plus binary file input in + * the form of class map_file. + */ + +#ifndef BOOST_RE_FILEITER_HPP_INCLUDED +#define BOOST_RE_FILEITER_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#include + +#ifndef BOOST_REGEX_NO_FILEITER + +#if (defined(__CYGWIN__) || defined(__CYGWIN32__)) && !defined(BOOST_REGEX_NO_W32) +#error "Sorry, can't mix with STL code and gcc compiler: if you ran configure, try again with configure --disable-ms-windows" +#define BOOST_REGEX_FI_WIN32_MAP +#define BOOST_REGEX_FI_POSIX_DIR +#elif (defined(__WIN32__) || defined(_WIN32) || defined(WIN32)) && !defined(BOOST_REGEX_NO_W32) +#define BOOST_REGEX_FI_WIN32_MAP +#define BOOST_REGEX_FI_WIN32_DIR +#else +#define BOOST_REGEX_FI_POSIX_MAP +#define BOOST_REGEX_FI_POSIX_DIR +#endif + +#if defined(BOOST_REGEX_FI_WIN32_MAP)||defined(BOOST_REGEX_FI_WIN32_DIR) +#include +#endif + +#if defined(BOOST_REGEX_FI_WIN32_DIR) + +#include + +namespace cutl_details_boost{ + namespace re_detail{ + +#ifndef BOOST_NO_ANSI_APIS +typedef WIN32_FIND_DATAA _fi_find_data; +#else +typedef WIN32_FIND_DATAW _fi_find_data; +#endif +typedef HANDLE _fi_find_handle; + + } // namespace re_detail + +} // namespace cutl_details_boost + +#define _fi_invalid_handle INVALID_HANDLE_VALUE +#define _fi_dir FILE_ATTRIBUTE_DIRECTORY + +#elif defined(BOOST_REGEX_FI_POSIX_DIR) + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__SUNPRO_CC) +using std::list; +#endif + +#ifndef MAX_PATH +#define MAX_PATH 256 +#endif + +namespace cutl_details_boost{ + namespace re_detail{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +struct _fi_find_data +{ + unsigned dwFileAttributes; + char cFileName[MAX_PATH]; +}; + +struct _fi_priv_data; + +typedef _fi_priv_data* _fi_find_handle; +#define _fi_invalid_handle 0 +#define _fi_dir 1 + +_fi_find_handle _fi_FindFirstFile(const char* lpFileName, _fi_find_data* lpFindFileData); +bool _fi_FindNextFile(_fi_find_handle hFindFile, _fi_find_data* lpFindFileData); +bool _fi_FindClose(_fi_find_handle hFindFile); + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + + } // namespace re_detail +} // namespace cutl_details_boost + +#ifdef FindFirstFile + #undef FindFirstFile +#endif +#ifdef FindNextFile + #undef FindNextFile +#endif +#ifdef FindClose + #undef FindClose +#endif + +#define FindFirstFileA _fi_FindFirstFile +#define FindNextFileA _fi_FindNextFile +#define FindClose _fi_FindClose + +#endif + +namespace cutl_details_boost{ + namespace re_detail{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +#ifdef BOOST_REGEX_FI_WIN32_MAP // win32 mapfile + +class BOOST_REGEX_DECL mapfile +{ + HANDLE hfile; + HANDLE hmap; + const char* _first; + const char* _last; +public: + + typedef const char* iterator; + + mapfile(){ hfile = hmap = 0; _first = _last = 0; } + mapfile(const char* file){ hfile = hmap = 0; _first = _last = 0; open(file); } + ~mapfile(){ close(); } + void open(const char* file); + void close(); + const char* begin(){ return _first; } + const char* end(){ return _last; } + size_t size(){ return _last - _first; } + bool valid(){ return (hfile != 0) && (hfile != INVALID_HANDLE_VALUE); } +}; + + +#else + +class BOOST_REGEX_DECL mapfile_iterator; + +class BOOST_REGEX_DECL mapfile +{ + typedef char* pointer; + std::FILE* hfile; + long int _size; + pointer* _first; + pointer* _last; + mutable std::list condemed; + enum sizes + { + buf_size = 4096 + }; + void lock(pointer* node)const; + void unlock(pointer* node)const; +public: + + typedef mapfile_iterator iterator; + + mapfile(){ hfile = 0; _size = 0; _first = _last = 0; } + mapfile(const char* file){ hfile = 0; _size = 0; _first = _last = 0; open(file); } + ~mapfile(){ close(); } + void open(const char* file); + void close(); + iterator begin()const; + iterator end()const; + unsigned long size()const{ return _size; } + bool valid()const{ return hfile != 0; } + friend class mapfile_iterator; +}; + +class BOOST_REGEX_DECL mapfile_iterator +#if !defined(BOOST_NO_STD_ITERATOR) || defined(BOOST_MSVC_STD_ITERATOR) +: public std::iterator +#endif +{ + typedef mapfile::pointer internal_pointer; + internal_pointer* node; + const mapfile* file; + unsigned long offset; + long position()const + { + return file ? ((node - file->_first) * mapfile::buf_size + offset) : 0; + } + void position(long pos) + { + if(file) + { + node = file->_first + (pos / mapfile::buf_size); + offset = pos % mapfile::buf_size; + } + } +public: + typedef std::ptrdiff_t difference_type; + typedef char value_type; + typedef const char* pointer; + typedef const char& reference; + typedef std::random_access_iterator_tag iterator_category; + + mapfile_iterator() { node = 0; file = 0; offset = 0; } + mapfile_iterator(const mapfile* f, long arg_position) + { + file = f; + node = f->_first + arg_position / mapfile::buf_size; + offset = arg_position % mapfile::buf_size; + if(file) + file->lock(node); + } + mapfile_iterator(const mapfile_iterator& i) + { + file = i.file; + node = i.node; + offset = i.offset; + if(file) + file->lock(node); + } + ~mapfile_iterator() + { + if(file && node) + file->unlock(node); + } + mapfile_iterator& operator = (const mapfile_iterator& i); + char operator* ()const + { + BOOST_ASSERT(node >= file->_first); + BOOST_ASSERT(node < file->_last); + return file ? *(*node + sizeof(int) + offset) : char(0); + } + char operator[] (long off)const + { + mapfile_iterator tmp(*this); + tmp += off; + return *tmp; + } + mapfile_iterator& operator++ (); + mapfile_iterator operator++ (int); + mapfile_iterator& operator-- (); + mapfile_iterator operator-- (int); + + mapfile_iterator& operator += (long off) + { + position(position() + off); + return *this; + } + mapfile_iterator& operator -= (long off) + { + position(position() - off); + return *this; + } + + friend inline bool operator==(const mapfile_iterator& i, const mapfile_iterator& j) + { + return (i.file == j.file) && (i.node == j.node) && (i.offset == j.offset); + } + + friend inline bool operator!=(const mapfile_iterator& i, const mapfile_iterator& j) + { + return !(i == j); + } + + friend inline bool operator<(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() < j.position(); + } + friend inline bool operator>(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() > j.position(); + } + friend inline bool operator<=(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() <= j.position(); + } + friend inline bool operator>=(const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() >= j.position(); + } + + friend mapfile_iterator operator + (const mapfile_iterator& i, long off); + friend mapfile_iterator operator + (long off, const mapfile_iterator& i) + { + mapfile_iterator tmp(i); + return tmp += off; + } + friend mapfile_iterator operator - (const mapfile_iterator& i, long off); + friend inline long operator - (const mapfile_iterator& i, const mapfile_iterator& j) + { + return i.position() - j.position(); + } +}; + +#endif + +// _fi_sep determines the directory separator, either '\\' or '/' +BOOST_REGEX_DECL extern const char* _fi_sep; + +struct file_iterator_ref +{ + _fi_find_handle hf; + _fi_find_data _data; + long count; +}; + + +class BOOST_REGEX_DECL file_iterator +{ + char* _root; + char* _path; + char* ptr; + file_iterator_ref* ref; + +public: + typedef std::ptrdiff_t difference_type; + typedef const char* value_type; + typedef const char** pointer; + typedef const char*& reference; + typedef std::input_iterator_tag iterator_category; + + file_iterator(); + file_iterator(const char* wild); + ~file_iterator(); + file_iterator(const file_iterator&); + file_iterator& operator=(const file_iterator&); + const char* root()const { return _root; } + const char* path()const { return _path; } + const char* name()const { return ptr; } + _fi_find_data* data() { return &(ref->_data); } + void next(); + file_iterator& operator++() { next(); return *this; } + file_iterator operator++(int); + const char* operator*() { return path(); } + + friend inline bool operator == (const file_iterator& f1, const file_iterator& f2) + { + return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); + } + + friend inline bool operator != (const file_iterator& f1, const file_iterator& f2) + { + return !(f1 == f2); + } + +}; + +// dwa 9/13/00 - suppress unused parameter warning +inline bool operator < (const file_iterator&, const file_iterator&) +{ + return false; +} + + +class BOOST_REGEX_DECL directory_iterator +{ + char* _root; + char* _path; + char* ptr; + file_iterator_ref* ref; + +public: + typedef std::ptrdiff_t difference_type; + typedef const char* value_type; + typedef const char** pointer; + typedef const char*& reference; + typedef std::input_iterator_tag iterator_category; + + directory_iterator(); + directory_iterator(const char* wild); + ~directory_iterator(); + directory_iterator(const directory_iterator& other); + directory_iterator& operator=(const directory_iterator& other); + + const char* root()const { return _root; } + const char* path()const { return _path; } + const char* name()const { return ptr; } + _fi_find_data* data() { return &(ref->_data); } + void next(); + directory_iterator& operator++() { next(); return *this; } + directory_iterator operator++(int); + const char* operator*() { return path(); } + + static const char* separator() { return _fi_sep; } + + friend inline bool operator == (const directory_iterator& f1, const directory_iterator& f2) + { + return ((f1.ref->hf == _fi_invalid_handle) && (f2.ref->hf == _fi_invalid_handle)); + } + + + friend inline bool operator != (const directory_iterator& f1, const directory_iterator& f2) + { + return !(f1 == f2); + } + + }; + +inline bool operator < (const directory_iterator&, const directory_iterator&) +{ + return false; +} + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + + +} // namespace re_detail +using cutl_details_boost::re_detail::directory_iterator; +using cutl_details_boost::re_detail::file_iterator; +using cutl_details_boost::re_detail::mapfile; +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_NO_FILEITER +#endif // BOOST_RE_FILEITER_HPP + + + + + + + + + + + + + + + + + + diff --git a/cutl/details/boost/regex/v4/instances.hpp b/cutl/details/boost/regex/v4/instances.hpp new file mode 100644 index 0000000..0ebf5c9 --- /dev/null +++ b/cutl/details/boost/regex/v4/instances.hpp @@ -0,0 +1,219 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE instances.cpp + * VERSION see + * DESCRIPTION: Defines those template instances that are placed in the + * library rather than in the users object files. + */ + +// +// note no include guard, we may include this multiple times: +// +#ifndef BOOST_REGEX_NO_EXTERNAL_TEMPLATES + +namespace cutl_details_boost{ + +// +// this header can be included multiple times, each time with +// a different character type, BOOST_REGEX_CHAR_T must be defined +// first: +// +#ifndef BOOST_REGEX_CHAR_T +# error "BOOST_REGEX_CHAR_T not defined" +#endif + +#ifndef BOOST_REGEX_TRAITS_T +# define BOOST_REGEX_TRAITS_T , cutl_details_boost::regex_traits +#endif + +// +// what follows is compiler specific: +// + +#if defined(__BORLANDC__) && (__BORLANDC__ < 0x600) + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +# ifndef BOOST_REGEX_INSTANTIATE +# pragma option push -Jgx +# endif + +template class BOOST_REGEX_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; +template class BOOST_REGEX_DECL match_results< const BOOST_REGEX_CHAR_T* >; +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_DECL ::cutl_details_boost::re_detail::perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >; +#endif + +# ifndef BOOST_REGEX_INSTANTIATE +# pragma option pop +# endif + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +#elif defined(BOOST_MSVC) || defined(__ICL) + +# ifndef BOOST_REGEX_INSTANTIATE +# ifdef __GNUC__ +# define template __extension__ extern template +# else +# if BOOST_MSVC > 1310 +# define BOOST_REGEX_TEMPLATE_DECL +# endif +# define template extern template +# endif +# endif + +#ifndef BOOST_REGEX_TEMPLATE_DECL +# define BOOST_REGEX_TEMPLATE_DECL BOOST_REGEX_DECL +#endif + +# ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable : 4251 4231 4660) +# endif + +template class BOOST_REGEX_TEMPLATE_DECL basic_regex< BOOST_REGEX_CHAR_T BOOST_REGEX_TRAITS_T >; + +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +template class BOOST_REGEX_TEMPLATE_DECL match_results< const BOOST_REGEX_CHAR_T* >; +#endif +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_TEMPLATE_DECL ::cutl_details_boost::re_detail::perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >; +#endif +#if !(defined(BOOST_DINKUMWARE_STDLIB) && (BOOST_DINKUMWARE_STDLIB <= 1))\ + && !(defined(BOOST_INTEL_CXX_VERSION) && (BOOST_INTEL_CXX_VERSION <= 800))\ + && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))\ + && !defined(BOOST_REGEX_ICU_INSTANCES) +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +template class BOOST_REGEX_TEMPLATE_DECL match_results< std::basic_string::const_iterator >; +#endif +#ifndef BOOST_NO_STD_ALLOCATOR +template class BOOST_REGEX_TEMPLATE_DECL ::cutl_details_boost::re_detail::perl_matcher< std::basic_string::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, cutl_details_boost::regex_traits >; +#endif +#endif + + +# ifdef BOOST_MSVC +# pragma warning(pop) +# endif + +# ifdef template +# undef template +# endif + +#undef BOOST_REGEX_TEMPLATE_DECL + +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || !defined(BOOST_NO_EXTERN_TEMPLATE) + +# ifndef BOOST_REGEX_INSTANTIATE +# ifdef __GNUC__ +# define template __extension__ extern template +# else +# define template extern template +# endif +# endif + +#if !defined(BOOST_NO_STD_LOCALE) && !defined(BOOST_REGEX_ICU_INSTANCES) +namespace re_detail{ +template BOOST_REGEX_DECL +std::locale cpp_regex_traits_base::imbue(const std::locale& l); + +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform_primary(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::transform(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::string_type + cpp_regex_traits_implementation::lookup_collatename(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +template BOOST_REGEX_DECL +void cpp_regex_traits_implementation::init(); +template BOOST_REGEX_DECL +cpp_regex_traits_implementation::char_class_type + cpp_regex_traits_implementation::lookup_classname_imp(const BOOST_REGEX_CHAR_T* p1, const BOOST_REGEX_CHAR_T* p2) const; +#ifdef BOOST_REGEX_BUGGY_CTYPE_FACET +template BOOST_REGEX_DECL +bool cpp_regex_traits_implementation::isctype(const BOOST_REGEX_CHAR_T c, char_class_type mask) const; +#endif +} // namespace +template BOOST_REGEX_DECL +int cpp_regex_traits::toi(const BOOST_REGEX_CHAR_T*& first, const BOOST_REGEX_CHAR_T* last, int radix)const; +template BOOST_REGEX_DECL +std::string cpp_regex_traits::catalog_name(const std::string& name); +template BOOST_REGEX_DECL +std::string& cpp_regex_traits::get_catalog_name_inst(); +template BOOST_REGEX_DECL +std::string cpp_regex_traits::get_catalog_name(); +#ifdef BOOST_HAS_THREADS +template BOOST_REGEX_DECL +static_mutex& cpp_regex_traits::get_mutex_inst(); +#endif +#endif + +template BOOST_REGEX_DECL basic_regex& + basic_regex::do_assign( + const BOOST_REGEX_CHAR_T* p1, + const BOOST_REGEX_CHAR_T* p2, + flag_type f); +template BOOST_REGEX_DECL basic_regex::locale_type BOOST_REGEX_CALL + basic_regex::imbue(locale_type l); + +template BOOST_REGEX_DECL void BOOST_REGEX_CALL + match_results::maybe_assign( + const match_results& m); + +namespace re_detail{ +template BOOST_REGEX_DECL void perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::construct_init( + const basic_regex& e, match_flag_type f); +template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::match(); +template BOOST_REGEX_DECL bool perl_matcher::allocator_type BOOST_REGEX_TRAITS_T >::find(); +} // namespace + +#if (defined(__GLIBCPP__) || defined(__GLIBCXX__)) \ + && !defined(BOOST_REGEX_ICU_INSTANCES)\ + && !defined(__SGI_STL_PORT)\ + && !defined(_STLPORT_VERSION) +// std:basic_string<>::const_iterator instances as well: +template BOOST_REGEX_DECL void BOOST_REGEX_CALL + match_results::const_iterator>::maybe_assign( + const match_results::const_iterator>& m); + +namespace re_detail{ +template BOOST_REGEX_DECL void perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, cutl_details_boost::regex_traits >::construct_init( + const basic_regex& e, match_flag_type f); +template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, cutl_details_boost::regex_traits >::match(); +template BOOST_REGEX_DECL bool perl_matcher::const_iterator, match_results< std::basic_string::const_iterator >::allocator_type, cutl_details_boost::regex_traits >::find(); +} // namespace +#endif + +# ifdef template +# undef template +# endif + + +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_NO_EXTERNAL_TEMPLATES + + + + + diff --git a/cutl/details/boost/regex/v4/iterator_category.hpp b/cutl/details/boost/regex/v4/iterator_category.hpp new file mode 100644 index 0000000..18b1291 --- /dev/null +++ b/cutl/details/boost/regex/v4/iterator_category.hpp @@ -0,0 +1,91 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see + * DESCRIPTION: Iterator traits for selecting an iterator type as + * an integral constant expression. + */ + + +#ifndef BOOST_REGEX_ITERATOR_CATEGORY_HPP +#define BOOST_REGEX_ITERATOR_CATEGORY_HPP + +#include +#include +#include + +namespace cutl_details_boost{ +namespace detail{ + +template +struct is_random_imp +{ +#ifndef BOOST_NO_STD_ITERATOR_TRAITS +private: + typedef typename std::iterator_traits::iterator_category cat; +public: + BOOST_STATIC_CONSTANT(bool, value = (::cutl_details_boost::is_convertible::value)); +#else + BOOST_STATIC_CONSTANT(bool, value = false); +#endif +}; + +template +struct is_random_pointer_imp +{ + BOOST_STATIC_CONSTANT(bool, value = true); +}; + +template +struct is_random_imp_selector +{ + template + struct rebind + { + typedef is_random_imp type; + }; +}; + +template <> +struct is_random_imp_selector +{ + template + struct rebind + { + typedef is_random_pointer_imp type; + }; +}; + +} + +template +struct is_random_access_iterator +{ +private: + typedef detail::is_random_imp_selector< ::cutl_details_boost::is_pointer::value> selector; + typedef typename selector::template rebind bound_type; + typedef typename bound_type::type answer; +public: + BOOST_STATIC_CONSTANT(bool, value = answer::value); +}; + +#ifndef BOOST_NO_INCLASS_MEMBER_INITIALIZATION +template +const bool is_random_access_iterator::value; +#endif + +} + +#endif + diff --git a/cutl/details/boost/regex/v4/iterator_traits.hpp b/cutl/details/boost/regex/v4/iterator_traits.hpp new file mode 100644 index 0000000..146db8a --- /dev/null +++ b/cutl/details/boost/regex/v4/iterator_traits.hpp @@ -0,0 +1,135 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE iterator_traits.cpp + * VERSION see + * DESCRIPTION: Declares iterator traits workarounds. + */ + +#ifndef BOOST_REGEX_V4_ITERATOR_TRAITS_HPP +#define BOOST_REGEX_V4_ITERATOR_TRAITS_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +#if defined(BOOST_NO_STD_ITERATOR_TRAITS) || defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + +template +struct regex_iterator_traits +{ + typedef typename T::iterator_category iterator_category; + typedef typename T::value_type value_type; +#if !defined(BOOST_NO_STD_ITERATOR) + typedef typename T::difference_type difference_type; + typedef typename T::pointer pointer; + typedef typename T::reference reference; +#else + typedef std::ptrdiff_t difference_type; + typedef value_type* pointer; + typedef value_type& reference; +#endif +}; + +template +struct pointer_iterator_traits +{ + typedef std::ptrdiff_t difference_type; + typedef T value_type; + typedef T* pointer; + typedef T& reference; + typedef std::random_access_iterator_tag iterator_category; +}; +template +struct const_pointer_iterator_traits +{ + typedef std::ptrdiff_t difference_type; + typedef T value_type; + typedef const T* pointer; + typedef const T& reference; + typedef std::random_access_iterator_tag iterator_category; +}; + +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; +// +// the follwoing are needed for ICU support: +// +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; + +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; +#endif + +#if defined(__SGI_STL_PORT) && defined(__STL_DEBUG) +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; +#ifndef BOOST_NO_STD_WSTRING +template<> +struct regex_iterator_traits : pointer_iterator_traits{}; +template<> +struct regex_iterator_traits : const_pointer_iterator_traits{}; +#endif // BOOST_NO_WSTRING +#endif // stport + +#else + +template +struct regex_iterator_traits : public std::iterator_traits {}; + +#endif + +} // namespace re_detail +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/match_flags.hpp b/cutl/details/boost/regex/v4/match_flags.hpp new file mode 100644 index 0000000..ac6fe21 --- /dev/null +++ b/cutl/details/boost/regex/v4/match_flags.hpp @@ -0,0 +1,138 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_flags.hpp + * VERSION see + * DESCRIPTION: Declares match_flags type. + */ + +#ifndef BOOST_REGEX_V4_MATCH_FLAGS +#define BOOST_REGEX_V4_MATCH_FLAGS + +#ifdef __cplusplus +# include +#endif + +#ifdef __cplusplus +namespace cutl_details_boost{ + namespace regex_constants{ +#endif + +typedef enum _match_flags +{ + match_default = 0, + match_not_bol = 1, /* first is not start of line */ + match_not_eol = match_not_bol << 1, /* last is not end of line */ + match_not_bob = match_not_eol << 1, /* first is not start of buffer */ + match_not_eob = match_not_bob << 1, /* last is not end of buffer */ + match_not_bow = match_not_eob << 1, /* first is not start of word */ + match_not_eow = match_not_bow << 1, /* last is not end of word */ + match_not_dot_newline = match_not_eow << 1, /* \n is not matched by '.' */ + match_not_dot_null = match_not_dot_newline << 1, /* '\0' is not matched by '.' */ + match_prev_avail = match_not_dot_null << 1, /* *--first is a valid expression */ + match_init = match_prev_avail << 1, /* internal use */ + match_any = match_init << 1, /* don't care what we match */ + match_not_null = match_any << 1, /* string can't be null */ + match_continuous = match_not_null << 1, /* each grep match must continue from */ + /* uninterupted from the previous one */ + match_partial = match_continuous << 1, /* find partial matches */ + + match_stop = match_partial << 1, /* stop after first match (grep) V3 only */ + match_not_initial_null = match_stop, /* don't match initial null, V4 only */ + match_all = match_stop << 1, /* must find the whole of input even if match_any is set */ + match_perl = match_all << 1, /* Use perl matching rules */ + match_posix = match_perl << 1, /* Use POSIX matching rules */ + match_nosubs = match_posix << 1, /* don't trap marked subs */ + match_extra = match_nosubs << 1, /* include full capture information for repeated captures */ + match_single_line = match_extra << 1, /* treat text as single line and ignor any \n's when matching ^ and $. */ + match_unused1 = match_single_line << 1, /* unused */ + match_unused2 = match_unused1 << 1, /* unused */ + match_unused3 = match_unused2 << 1, /* unused */ + match_max = match_unused3, + + format_perl = 0, /* perl style replacement */ + format_default = 0, /* ditto. */ + format_sed = match_max << 1, /* sed style replacement. */ + format_all = format_sed << 1, /* enable all extentions to sytax. */ + format_no_copy = format_all << 1, /* don't copy non-matching segments. */ + format_first_only = format_no_copy << 1, /* Only replace first occurance. */ + format_is_if = format_first_only << 1, /* internal use only. */ + format_literal = format_is_if << 1 /* treat string as a literal */ + +} match_flags; + +#if (defined(_MSC_VER) && (_MSC_VER < 1300)) || defined(__BORLANDC__) +typedef unsigned long match_flag_type; +#else +typedef match_flags match_flag_type; + + +#ifdef __cplusplus +inline match_flags operator&(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) & static_cast(m2)); } +inline match_flags operator|(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) | static_cast(m2)); } +inline match_flags operator^(match_flags m1, match_flags m2) +{ return static_cast(static_cast(m1) ^ static_cast(m2)); } +inline match_flags operator~(match_flags m1) +{ return static_cast(~static_cast(m1)); } +inline match_flags& operator&=(match_flags& m1, match_flags m2) +{ m1 = m1&m2; return m1; } +inline match_flags& operator|=(match_flags& m1, match_flags m2) +{ m1 = m1|m2; return m1; } +inline match_flags& operator^=(match_flags& m1, match_flags m2) +{ m1 = m1^m2; return m1; } +#endif +#endif + +#ifdef __cplusplus +} /* namespace regex_constants */ +/* + * import names into boost for backwards compatiblity: + */ +using regex_constants::match_flag_type; +using regex_constants::match_default; +using regex_constants::match_not_bol; +using regex_constants::match_not_eol; +using regex_constants::match_not_bob; +using regex_constants::match_not_eob; +using regex_constants::match_not_bow; +using regex_constants::match_not_eow; +using regex_constants::match_not_dot_newline; +using regex_constants::match_not_dot_null; +using regex_constants::match_prev_avail; +/* using regex_constants::match_init; */ +using regex_constants::match_any; +using regex_constants::match_not_null; +using regex_constants::match_continuous; +using regex_constants::match_partial; +/*using regex_constants::match_stop; */ +using regex_constants::match_all; +using regex_constants::match_perl; +using regex_constants::match_posix; +using regex_constants::match_nosubs; +using regex_constants::match_extra; +using regex_constants::match_single_line; +/*using regex_constants::match_max; */ +using regex_constants::format_all; +using regex_constants::format_sed; +using regex_constants::format_perl; +using regex_constants::format_default; +using regex_constants::format_no_copy; +using regex_constants::format_first_only; +/*using regex_constants::format_is_if;*/ + +} /* namespace cutl_details_boost */ +#endif /* __cplusplus */ +#endif /* include guard */ + diff --git a/cutl/details/boost/regex/v4/match_results.hpp b/cutl/details/boost/regex/v4/match_results.hpp new file mode 100644 index 0000000..4cdeb3c --- /dev/null +++ b/cutl/details/boost/regex/v4/match_results.hpp @@ -0,0 +1,699 @@ +/* + * + * Copyright (c) 1998-2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE match_results.cpp + * VERSION see + * DESCRIPTION: Declares template class match_results. + */ + +#ifndef BOOST_REGEX_V4_MATCH_RESULTS_HPP +#define BOOST_REGEX_V4_MATCH_RESULTS_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660) +#endif + +namespace re_detail{ + +class named_subexpressions; + +} + +template +class match_results +{ +private: +#ifndef BOOST_NO_STD_ALLOCATOR + typedef std::vector, Allocator> vector_type; +#else + typedef std::vector > vector_type; +#endif +public: + typedef sub_match value_type; +#if !defined(BOOST_NO_STD_ALLOCATOR) && !(defined(BOOST_MSVC) && defined(_STLPORT_VERSION)) + typedef typename Allocator::const_reference const_reference; +#else + typedef const value_type& const_reference; +#endif + typedef const_reference reference; + typedef typename vector_type::const_iterator const_iterator; + typedef const_iterator iterator; + typedef typename re_detail::regex_iterator_traits< + BidiIterator>::difference_type difference_type; + typedef typename Allocator::size_type size_type; + typedef Allocator allocator_type; + typedef typename re_detail::regex_iterator_traits< + BidiIterator>::value_type char_type; + typedef std::basic_string string_type; + typedef re_detail::named_subexpressions named_sub_type; + + // construct/copy/destroy: + explicit match_results(const Allocator& a = Allocator()) +#ifndef BOOST_NO_STD_ALLOCATOR + : m_subs(a), m_base(), m_last_closed_paren(0), m_is_singular(true) {} +#else + : m_subs(), m_base(), m_last_closed_paren(0), m_is_singular(true) { (void)a; } +#endif + match_results(const match_results& m) + : m_subs(m.m_subs), m_named_subs(m.m_named_subs), m_last_closed_paren(m.m_last_closed_paren), m_is_singular(m.m_is_singular) + { + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + } + match_results& operator=(const match_results& m) + { + m_subs = m.m_subs; + m_named_subs = m.m_named_subs; + m_last_closed_paren = m.m_last_closed_paren; + m_is_singular = m.m_is_singular; + if(!m_is_singular) + { + m_base = m.m_base; + m_null = m.m_null; + } + return *this; + } + ~match_results(){} + + // size: + size_type size() const + { return empty() ? 0 : m_subs.size() - 2; } + size_type max_size() const + { return m_subs.max_size(); } + bool empty() const + { return m_subs.size() < 2; } + // element access: + difference_type length(int sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + if((sub < (int)m_subs.size()) && (sub > 0)) + return m_subs[sub].length(); + return 0; + } + difference_type length(const char_type* sub) const + { + if(m_is_singular) + raise_logic_error(); + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template + difference_type length(const charT* sub) const + { + if(m_is_singular) + raise_logic_error(); + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return length(named_subexpression_index(sub, sub_end)); + } + template + difference_type length(const std::basic_string& sub) const + { + return length(sub.c_str()); + } + difference_type position(size_type sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + if(sub < m_subs.size()) + { + const sub_match& s = m_subs[sub]; + if(s.matched || (sub == 2)) + { + return ::cutl_details_boost::re_detail::distance((BidiIterator)(m_base), (BidiIterator)(s.first)); + } + } + return ~static_cast(0); + } + difference_type position(const char_type* sub) const + { + const char_type* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template + difference_type position(const charT* sub) const + { + const charT* sub_end = sub; + while(*sub_end) ++sub_end; + return position(named_subexpression_index(sub, sub_end)); + } + template + difference_type position(const std::basic_string& sub) const + { + return position(sub.c_str()); + } + string_type str(int sub = 0) const + { + if(m_is_singular) + raise_logic_error(); + sub += 2; + string_type result; + if(sub < (int)m_subs.size() && (sub > 0)) + { + const sub_match& s = m_subs[sub]; + if(s.matched) + { + result = s.str(); + } + } + return result; + } + string_type str(const char_type* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const charT* sub) const + { + return (*this)[sub].str(); + } + template + string_type str(const std::basic_string& sub) const + { + return (*this)[sub].str(); + } + const_reference operator[](int sub) const + { + if(m_is_singular && m_subs.empty()) + raise_logic_error(); + sub += 2; + if(sub < (int)m_subs.size() && (sub >= 0)) + { + return m_subs[sub]; + } + return m_null; + } + // + // Named sub-expressions: + // + const_reference named_subexpression(const char_type* i, const char_type* j) const + { + // + // Scan for the leftmost *matched* subexpression with the specified named: + // + if(m_is_singular) + raise_logic_error(); + re_detail::named_subexpressions::range_type r = m_named_subs->equal_range(i, j); + while((r.first != r.second) && ((*this)[r.first->index].matched == false)) + ++r.first; + return r.first != r.second ? (*this)[r.first->index] : m_null; + } + template + const_reference named_subexpression(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return m_null; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + int named_subexpression_index(const char_type* i, const char_type* j) const + { + // + // Scan for the leftmost *matched* subexpression with the specified named. + // If none found then return the leftmost expression with that name, + // otherwise an invalid index: + // + if(m_is_singular) + raise_logic_error(); + re_detail::named_subexpressions::range_type s, r; + s = r = m_named_subs->equal_range(i, j); + while((r.first != r.second) && ((*this)[r.first->index].matched == false)) + ++r.first; + if(r.first == r.second) + r = s; + return r.first != r.second ? r.first->index : -20; + } + template + int named_subexpression_index(const charT* i, const charT* j) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(i == j) + return -20; + std::vector s; + while(i != j) + s.insert(s.end(), *i++); + return named_subexpression_index(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& s) const + { + return named_subexpression(s.c_str(), s.c_str() + s.size()); + } + const_reference operator[](const char_type* p) const + { + const char_type* e = p; + while(*e) ++e; + return named_subexpression(p, e); + } + + template + const_reference operator[](const charT* p) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(*p == 0) + return m_null; + std::vector s; + while(*p) + s.insert(s.end(), *p++); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + template + const_reference operator[](const std::basic_string& ns) const + { + BOOST_STATIC_ASSERT(sizeof(charT) <= sizeof(char_type)); + if(ns.empty()) + return m_null; + std::vector s; + for(unsigned i = 0; i < ns.size(); ++i) + s.insert(s.end(), ns[i]); + return named_subexpression(&*s.begin(), &*s.begin() + s.size()); + } + + const_reference prefix() const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[-1]; + } + + const_reference suffix() const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[-2]; + } + const_iterator begin() const + { + return (m_subs.size() > 2) ? (m_subs.begin() + 2) : m_subs.end(); + } + const_iterator end() const + { + return m_subs.end(); + } + // format: + template + OutputIterator format(OutputIterator out, + Functor fmt, + match_flag_type flags = format_default) const + { + if(m_is_singular) + raise_logic_error(); + typedef typename re_detail::compute_functor_type, OutputIterator>::type F; + F func(fmt); + return func(*this, out, flags); + } + template + string_type format(Functor fmt, match_flag_type flags = format_default) const + { + if(m_is_singular) + raise_logic_error(); + std::basic_string result; + re_detail::string_out_iterator > i(result); + + typedef typename re_detail::compute_functor_type, re_detail::string_out_iterator > >::type F; + F func(fmt); + + func(*this, i, flags); + return result; + } + // format with locale: + template + OutputIterator format(OutputIterator out, + Functor fmt, + match_flag_type flags, + const RegexT& re) const + { + if(m_is_singular) + raise_logic_error(); + typedef ::cutl_details_boost::regex_traits_wrapper traits_type; + typedef typename re_detail::compute_functor_type, OutputIterator, traits_type>::type F; + F func(fmt); + return func(*this, out, flags, re.get_traits()); + } + template + string_type format(Functor fmt, + match_flag_type flags, + const RegexT& re) const + { + if(m_is_singular) + raise_logic_error(); + typedef ::cutl_details_boost::regex_traits_wrapper traits_type; + std::basic_string result; + re_detail::string_out_iterator > i(result); + + typedef typename re_detail::compute_functor_type, re_detail::string_out_iterator >, traits_type >::type F; + F func(fmt); + + func(*this, i, flags, re.get_traits()); + return result; + } + + const_reference get_last_closed_paren()const + { + if(m_is_singular) + raise_logic_error(); + return m_last_closed_paren == 0 ? m_null : (*this)[m_last_closed_paren]; + } + + allocator_type get_allocator() const + { +#ifndef BOOST_NO_STD_ALLOCATOR + return m_subs.get_allocator(); +#else + return allocator_type(); +#endif + } + void swap(match_results& that) + { + std::swap(m_subs, that.m_subs); + std::swap(m_named_subs, that.m_named_subs); + std::swap(m_last_closed_paren, that.m_last_closed_paren); + if(m_is_singular) + { + if(!that.m_is_singular) + { + m_base = that.m_base; + m_null = that.m_null; + } + } + else if(that.m_is_singular) + { + that.m_base = m_base; + that.m_null = m_null; + } + else + { + std::swap(m_base, that.m_base); + std::swap(m_null, that.m_null); + } + std::swap(m_is_singular, that.m_is_singular); + } + bool operator==(const match_results& that)const + { + if(m_is_singular) + { + return that.m_is_singular; + } + else if(that.m_is_singular) + { + return false; + } + return (m_subs == that.m_subs) && (m_base == that.m_base) && (m_last_closed_paren == that.m_last_closed_paren); + } + bool operator!=(const match_results& that)const + { return !(*this == that); } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef typename sub_match::capture_sequence_type capture_sequence_type; + + const capture_sequence_type& captures(int i)const + { + if(m_is_singular) + raise_logic_error(); + return (*this)[i].captures(); + } +#endif + + // + // private access functions: + void BOOST_REGEX_CALL set_second(BidiIterator i) + { + BOOST_ASSERT(m_subs.size() > 2); + m_subs[2].second = i; + m_subs[2].matched = true; + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + m_is_singular = false; + } + + void BOOST_REGEX_CALL set_second(BidiIterator i, size_type pos, bool m = true, bool escape_k = false) + { + if(pos) + m_last_closed_paren = static_cast(pos); + pos += 2; + BOOST_ASSERT(m_subs.size() > pos); + m_subs[pos].second = i; + m_subs[pos].matched = m; + if((pos == 2) && !escape_k) + { + m_subs[0].first = i; + m_subs[0].matched = (m_subs[0].first != m_subs[0].second); + m_null.first = i; + m_null.second = i; + m_null.matched = false; + m_is_singular = false; + } + } + void BOOST_REGEX_CALL set_size(size_type n, BidiIterator i, BidiIterator j) + { + value_type v(j); + size_type len = m_subs.size(); + if(len > n + 2) + { + m_subs.erase(m_subs.begin()+n+2, m_subs.end()); + std::fill(m_subs.begin(), m_subs.end(), v); + } + else + { + std::fill(m_subs.begin(), m_subs.end(), v); + if(n+2 != len) + m_subs.insert(m_subs.end(), n+2-len, v); + } + m_subs[1].first = i; + m_last_closed_paren = 0; + } + void BOOST_REGEX_CALL set_base(BidiIterator pos) + { + m_base = pos; + } + BidiIterator base()const + { + return m_base; + } + void BOOST_REGEX_CALL set_first(BidiIterator i) + { + BOOST_ASSERT(m_subs.size() > 2); + // set up prefix: + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != i); + // set up $0: + m_subs[2].first = i; + // zero out everything else: + for(size_type n = 3; n < m_subs.size(); ++n) + { + m_subs[n].first = m_subs[n].second = m_subs[0].second; + m_subs[n].matched = false; + } + } + void BOOST_REGEX_CALL set_first(BidiIterator i, size_type pos, bool escape_k = false) + { + BOOST_ASSERT(pos+2 < m_subs.size()); + if(pos || escape_k) + { + m_subs[pos+2].first = i; + if(escape_k) + { + m_subs[1].second = i; + m_subs[1].matched = (m_subs[1].first != m_subs[1].second); + } + } + else + set_first(i); + } + void BOOST_REGEX_CALL maybe_assign(const match_results& m); + + void BOOST_REGEX_CALL set_named_subs(cutl_details_boost::shared_ptr subs) + { + m_named_subs = subs; + } + +private: + // + // Error handler called when an uninitialized match_results is accessed: + // + static void raise_logic_error() + { + std::logic_error e("Attempt to access an uninitialzed cutl_details_boost::match_results<> class."); + cutl_details_boost::throw_exception(e); + } + + + vector_type m_subs; // subexpressions + BidiIterator m_base; // where the search started from + sub_match m_null; // a null match + cutl_details_boost::shared_ptr m_named_subs; // Shared copy of named subs in the regex object + int m_last_closed_paren; // Last ) to be seen - used for formatting + bool m_is_singular; // True if our stored iterators are singular +}; + +template +void BOOST_REGEX_CALL match_results::maybe_assign(const match_results& m) +{ + if(m_is_singular) + { + *this = m; + return; + } + const_iterator p1, p2; + p1 = begin(); + p2 = m.begin(); + // + // Distances are measured from the start of *this* match, unless this isn't + // a valid match in which case we use the start of the whole sequence. Note that + // no subsequent match-candidate can ever be to the left of the first match found. + // This ensures that when we are using bidirectional iterators, that distances + // measured are as short as possible, and therefore as efficient as possible + // to compute. Finally note that we don't use the "matched" data member to test + // whether a sub-expression is a valid match, because partial matches set this + // to false for sub-expression 0. + // + BidiIterator l_end = this->suffix().second; + BidiIterator l_base = (p1->first == l_end) ? this->prefix().first : (*this)[0].first; + difference_type len1 = 0; + difference_type len2 = 0; + difference_type base1 = 0; + difference_type base2 = 0; + std::size_t i; + for(i = 0; i < size(); ++i, ++p1, ++p2) + { + // + // Leftmost takes priority over longest; handle special cases + // where distances need not be computed first (an optimisation + // for bidirectional iterators: ensure that we don't accidently + // compute the length of the whole sequence, as this can be really + // expensive). + // + if(p1->first == l_end) + { + if(p2->first != l_end) + { + // p2 must be better than p1, and no need to calculate + // actual distances: + base1 = 1; + base2 = 0; + break; + } + else + { + // *p1 and *p2 are either unmatched or match end-of sequence, + // either way no need to calculate distances: + if((p1->matched == false) && (p2->matched == true)) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + continue; + } + } + else if(p2->first == l_end) + { + // p1 better than p2, and no need to calculate distances: + return; + } + base1 = ::cutl_details_boost::re_detail::distance(l_base, p1->first); + base2 = ::cutl_details_boost::re_detail::distance(l_base, p2->first); + BOOST_ASSERT(base1 >= 0); + BOOST_ASSERT(base2 >= 0); + if(base1 < base2) return; + if(base2 < base1) break; + + len1 = ::cutl_details_boost::re_detail::distance((BidiIterator)p1->first, (BidiIterator)p1->second); + len2 = ::cutl_details_boost::re_detail::distance((BidiIterator)p2->first, (BidiIterator)p2->second); + BOOST_ASSERT(len1 >= 0); + BOOST_ASSERT(len2 >= 0); + if((len1 != len2) || ((p1->matched == false) && (p2->matched == true))) + break; + if((p1->matched == true) && (p2->matched == false)) + return; + } + if(i == size()) + return; + if(base2 < base1) + *this = m; + else if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) ) + *this = m; +} + +template +void swap(match_results& a, match_results& b) +{ + a.swap(b); +} + +#ifndef BOOST_NO_STD_LOCALE +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const match_results& s) +{ + return (os << s.str()); +} +#else +template +std::ostream& operator << (std::ostream& os, + const match_results& s) +{ + return (os << s.str()); +} +#endif + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + diff --git a/cutl/details/boost/regex/v4/mem_block_cache.hpp b/cutl/details/boost/regex/v4/mem_block_cache.hpp new file mode 100644 index 0000000..98e7e8a --- /dev/null +++ b/cutl/details/boost/regex/v4/mem_block_cache.hpp @@ -0,0 +1,99 @@ + /* + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mem_block_cache.hpp + * VERSION see + * DESCRIPTION: memory block cache used by the non-recursive matcher. + */ + +#ifndef BOOST_REGEX_V4_MEM_BLOCK_CACHE_HPP +#define BOOST_REGEX_V4_MEM_BLOCK_CACHE_HPP + +#include +#ifdef BOOST_HAS_THREADS +#include +#endif + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +struct mem_block_node +{ + mem_block_node* next; +}; + +struct mem_block_cache +{ + // this member has to be statically initialsed: + mem_block_node* next; + unsigned cached_blocks; +#ifdef BOOST_HAS_THREADS + cutl_details_boost::static_mutex mut; +#endif + + ~mem_block_cache() + { + while(next) + { + mem_block_node* old = next; + next = next->next; + ::operator delete(old); + } + } + void* get() + { +#ifdef BOOST_HAS_THREADS + cutl_details_boost::static_mutex::scoped_lock g(mut); +#endif + if(next) + { + mem_block_node* result = next; + next = next->next; + --cached_blocks; + return result; + } + return ::operator new(BOOST_REGEX_BLOCKSIZE); + } + void put(void* p) + { +#ifdef BOOST_HAS_THREADS + cutl_details_boost::static_mutex::scoped_lock g(mut); +#endif + if(cached_blocks >= BOOST_REGEX_MAX_CACHE_BLOCKS) + { + ::operator delete(p); + } + else + { + mem_block_node* old = static_cast(p); + old->next = next; + next = old; + ++cached_blocks; + } + } +}; + +extern mem_block_cache block_cache; + +} +} // namespace cutl_details_boost + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/perl_matcher.hpp b/cutl/details/boost/regex/v4/perl_matcher.hpp new file mode 100644 index 0000000..8da8bed --- /dev/null +++ b/cutl/details/boost/regex/v4/perl_matcher.hpp @@ -0,0 +1,584 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#ifndef BOOST_REGEX_MATCHER_HPP +#define BOOST_REGEX_MATCHER_HPP + +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +// +// error checking API: +// +BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(cutl_details_boost::regex_constants::syntax_option_type ef, match_flag_type mf); +// +// function can_start: +// +template +inline bool can_start(charT c, const unsigned char* map, unsigned char mask) +{ + return ((c < static_cast(0)) ? true : ((c >= static_cast(1 << CHAR_BIT)) ? true : map[c] & mask)); +} +inline bool can_start(char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) +{ + return map[c] & mask; +} +inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) +{ + return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); +} +#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives. +#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) +{ + return ((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask); +} +#endif +#endif +#if !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) +{ + return (((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask)); +} +#endif + + +// +// Unfortunately Rogue Waves standard library appears to have a bug +// in std::basic_string::compare that results in eroneous answers +// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version +// 0x020101) the test case was: +// {39135,0} < {0xff,0} +// which succeeds when it should not. +// +#ifndef _RWSTD_VER +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) +template +inline int string_compare(const std::basic_string& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +#endif +#else +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) +template +inline int string_compare(const std::basic_string& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +#endif +inline int string_compare(const std::string& s, const char* p) +{ return std::strcmp(s.c_str(), p); } +# ifndef BOOST_NO_WREGEX +inline int string_compare(const std::wstring& s, const wchar_t* p) +{ return std::wcscmp(s.c_str(), p); } +#endif +#endif +template +inline int string_compare(const Seq& s, const C* p) +{ + std::size_t i = 0; + while((i < s.size()) && (p[i] == s[i])) + { + ++i; + } + return (i == s.size()) ? -p[i] : s[i] - p[i]; +} +# define STR_COMP(s,p) string_compare(s,p) + +template +inline const charT* re_skip_past_null(const charT* p) +{ + while (*p != static_cast(0)) ++p; + return ++p; +} + +template +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase) +{ + const charT* p = reinterpret_cast(set_+1); + iterator ptr; + unsigned int i; + //bool icase = e.m_flags & regex_constants::icase; + + if(next == last) return next; + + typedef typename traits_type::string_type traits_string_type; + const ::cutl_details_boost::regex_traits_wrapper& traits_inst = *(e.m_ptraits); + + // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never + // referenced + (void)traits_inst; + + // try and match a single character, could be a multi-character + // collating element... + for(i = 0; i < set_->csingles; ++i) + { + ptr = next; + if(*p == static_cast(0)) + { + // treat null string as special case: + if(traits_inst.translate(*ptr, icase) != *p) + { + while(*p == static_cast(0))++p; + continue; + } + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + } + else + { + while(*p && (ptr != last)) + { + if(traits_inst.translate(*ptr, icase) != *p) + break; + ++p; + ++ptr; + } + + if(*p == static_cast(0)) // if null we've matched + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + + p = re_skip_past_null(p); // skip null + } + } + + charT col = traits_inst.translate(*next, icase); + + + if(set_->cranges || set_->cequivalents) + { + traits_string_type s1; + // + // try and match a range, NB only a single character can match + if(set_->cranges) + { + if((e.m_flags & regex_constants::collate) == 0) + s1.assign(1, col); + else + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform(a, a + 1); + } + for(i = 0; i < set_->cranges; ++i) + { + if(STR_COMP(s1, p) >= 0) + { + do{ ++p; }while(*p); + ++p; + if(STR_COMP(s1, p) <= 0) + return set_->isnot ? next : ++next; + } + else + { + // skip first string + do{ ++p; }while(*p); + ++p; + } + // skip second string + do{ ++p; }while(*p); + ++p; + } + } + // + // try and match an equivalence class, NB only a single character can match + if(set_->cequivalents) + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform_primary(a, a +1); + for(i = 0; i < set_->cequivalents; ++i) + { + if(STR_COMP(s1, p) == 0) + return set_->isnot ? next : ++next; + // skip string + do{ ++p; }while(*p); + ++p; + } + } + } + if(traits_inst.isctype(col, set_->cclasses) == true) + return set_->isnot ? next : ++next; + if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) + return set_->isnot ? next : ++next; + return set_->isnot ? ++next : next; +} + +template +class repeater_count +{ + repeater_count** stack; + repeater_count* next; + int state_id; + std::size_t count; // the number of iterations so far + BidiIterator start_pos; // where the last repeat started +public: + repeater_count(repeater_count** s) + { + stack = s; + next = 0; + state_id = -1; + count = 0; + } + repeater_count(int i, repeater_count** s, BidiIterator start) + : start_pos(start) + { + state_id = i; + stack = s; + next = *stack; + *stack = this; + if(state_id > next->state_id) + count = 0; + else + { + repeater_count* p = next; + while(p && (p->state_id != state_id)) + p = p->next; + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; + } + } + ~repeater_count() + { + if(next) + *stack = next; + } + std::size_t get_count() { return count; } + int get_id() { return state_id; } + std::size_t operator++() { return ++count; } + bool check_null_repeat(const BidiIterator& pos, std::size_t max) + { + // this is called when we are about to start a new repeat, + // if the last one was NULL move our count to max, + // otherwise save the current position. + bool result = (count == 0) ? false : (pos == start_pos); + if(result) + count = max; + else + start_pos = pos; + return result; + } +}; + +struct saved_state; + +enum saved_state_type +{ + saved_type_end = 0, + saved_type_paren = 1, + saved_type_recurse = 2, + saved_type_assertion = 3, + saved_state_alt = 4, + saved_state_repeater_count = 5, + saved_state_extra_block = 6, + saved_state_greedy_single_repeat = 7, + saved_state_rep_slow_dot = 8, + saved_state_rep_fast_dot = 9, + saved_state_rep_char = 10, + saved_state_rep_short_set = 11, + saved_state_rep_long_set = 12, + saved_state_non_greedy_long_repeat = 13, + saved_state_count = 14 +}; + +template +struct recursion_info +{ + typedef typename Results::value_type value_type; + typedef typename value_type::iterator iterator; + int idx; + const re_syntax_base* preturn_address; + Results results; + repeater_count* repeater_stack; +}; + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660) +#endif + +template +class perl_matcher +{ +public: + typedef typename traits::char_type char_type; + typedef perl_matcher self_type; + typedef bool (self_type::*matcher_proc_type)(void); + typedef std::size_t traits_size_type; + typedef typename is_byte::width_type width_type; + typedef typename regex_iterator_traits::difference_type difference_type; + typedef match_results results_type; + + perl_matcher(BidiIterator first, BidiIterator end, + match_results& what, + const basic_regex& e, + match_flag_type f, + BidiIterator l_base) + : m_result(what), base(first), last(end), + position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), + m_independent(false), next_count(&rep_obj), rep_obj(&next_count) + { + construct_init(e, f); + } + + bool match(); + bool find(); + + void setf(match_flag_type f) + { m_match_flags |= f; } + void unsetf(match_flag_type f) + { m_match_flags &= ~f; } + +private: + void construct_init(const basic_regex& e, match_flag_type f); + + bool find_imp(); + bool match_imp(); +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + typedef bool (perl_matcher::*protected_proc_type)(); + bool protected_call(protected_proc_type); +#endif + void estimate_max_state_count(std::random_access_iterator_tag*); + void estimate_max_state_count(void*); + bool match_prefix(); + bool match_all_states(); + + // match procs, stored in s_match_vtable: + bool match_startmark(); + bool match_endmark(); + bool match_literal(); + bool match_start_line(); + bool match_end_line(); + bool match_wild(); + bool match_match(); + bool match_word_boundary(); + bool match_within_word(); + bool match_word_start(); + bool match_word_end(); + bool match_buffer_start(); + bool match_buffer_end(); + bool match_backref(); + bool match_long_set(); + bool match_set(); + bool match_jump(); + bool match_alt(); + bool match_rep(); + bool match_combining(); + bool match_soft_buffer_end(); + bool match_restart_continue(); + bool match_long_set_repeat(); + bool match_set_repeat(); + bool match_char_repeat(); + bool match_dot_repeat_fast(); + bool match_dot_repeat_slow(); + bool match_dot_repeat_dispatch() + { + return ::cutl_details_boost::is_random_access_iterator::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); + } + bool match_backstep(); + bool match_assert_backref(); + bool match_toggle_case(); +#ifdef BOOST_REGEX_RECURSIVE + bool backtrack_till_match(std::size_t count); +#endif + bool match_recursion(); + + // find procs stored in s_find_vtable: + bool find_restart_any(); + bool find_restart_word(); + bool find_restart_line(); + bool find_restart_buf(); + bool find_restart_lit(); + +private: + // final result structure to be filled in: + match_results& m_result; + // temporary result for POSIX matches: + scoped_ptr > m_temp_match; + // pointer to actual result structure to fill in: + match_results* m_presult; + // start of sequence being searched: + BidiIterator base; + // end of sequence being searched: + BidiIterator last; + // current character being examined: + BidiIterator position; + // where to restart next search after failed match attempt: + BidiIterator restart; + // where the current search started from, acts as base for $` during grep: + BidiIterator search_base; + // how far we can go back when matching lookbehind: + BidiIterator backstop; + // the expression being examined: + const basic_regex& re; + // the expression's traits class: + const ::cutl_details_boost::regex_traits_wrapper& traits_inst; + // the next state in the machine being matched: + const re_syntax_base* pstate; + // matching flags in use: + match_flag_type m_match_flags; + // how many states we have examined so far: + std::ptrdiff_t state_count; + // max number of states to examine before giving up: + std::ptrdiff_t max_state_count; + // whether we should ignore case or not: + bool icase; + // set to true when (position == last), indicates that we may have a partial match: + bool m_has_partial_match; + // set to true whenever we get a match: + bool m_has_found_match; + // set to true whenever we're inside an independent sub-expression: + bool m_independent; + // the current repeat being examined: + repeater_count* next_count; + // the first repeat being examined (top of linked list): + repeater_count rep_obj; + // the mask to pass when matching word boundaries: + typename traits::char_class_type m_word_mask; + // the bitmask to use when determining whether a match_any matches a newline or not: + unsigned char match_any_mask; + // recursion information: + std::vector > recursion_stack; + +#ifdef BOOST_REGEX_NON_RECURSIVE + // + // additional members for non-recursive version: + // + typedef bool (self_type::*unwind_proc_type)(bool); + + void extend_stack(); + bool unwind(bool); + bool unwind_end(bool); + bool unwind_paren(bool); + bool unwind_recursion_stopper(bool); + bool unwind_assertion(bool); + bool unwind_alt(bool); + bool unwind_repeater_counter(bool); + bool unwind_extra_block(bool); + bool unwind_greedy_single_repeat(bool); + bool unwind_slow_dot_repeat(bool); + bool unwind_fast_dot_repeat(bool); + bool unwind_char_repeat(bool); + bool unwind_short_set_repeat(bool); + bool unwind_long_set_repeat(bool); + bool unwind_non_greedy_repeat(bool); + bool unwind_recursion(bool); + bool unwind_recursion_pop(bool); + void destroy_single_repeat(); + void push_matched_paren(int index, const sub_match& sub); + void push_recursion_stopper(); + void push_assertion(const re_syntax_base* ps, bool positive); + void push_alt(const re_syntax_base* ps); + void push_repeater_count(int i, repeater_count** s); + void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); + void push_non_greedy_repeat(const re_syntax_base* ps); + void push_recursion(int idx, const re_syntax_base* p, results_type* presults); + void push_recursion_pop(); + + // pointer to base of stack: + saved_state* m_stack_base; + // pointer to current stack position: + saved_state* m_backup_state; + // determines what value to return when unwinding from recursion, + // allows for mixed recursive/non-recursive algorithm: + bool m_recursive_result; + // how many memory blocks have we used up?: + unsigned used_block_count; +#endif + + // these operations aren't allowed, so are declared private, + // bodies are provided to keep explicit-instantiation requests happy: + perl_matcher& operator=(const perl_matcher&) + { + return *this; + } + perl_matcher(const perl_matcher& that) + : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} +}; + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +// +// include the implementation of perl_matcher: +// +#ifdef BOOST_REGEX_RECURSIVE +#include +#else +#include +#endif +// this one has to be last: +#include + +#endif + diff --git a/cutl/details/boost/regex/v4/perl_matcher_common.hpp b/cutl/details/boost/regex/v4/perl_matcher_common.hpp new file mode 100644 index 0000000..d7c9a85 --- /dev/null +++ b/cutl/details/boost/regex/v4/perl_matcher_common.hpp @@ -0,0 +1,990 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * common to both the recursive and non-recursive versions. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef __BORLANDC__ +# pragma option push -w-8008 -w-8066 +#endif +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +template +void perl_matcher::construct_init(const basic_regex& e, match_flag_type f) +{ + typedef typename regex_iterator_traits::iterator_category category; + typedef typename basic_regex::flag_type expression_flag_type; + + if(e.empty()) + { + // precondition failure: e is not a valid regex. + std::invalid_argument ex("Invalid regular expression object"); + cutl_details_boost::throw_exception(ex); + } + pstate = 0; + m_match_flags = f; + estimate_max_state_count(static_cast(0)); + expression_flag_type re_f = re.flags(); + icase = re_f & regex_constants::icase; + if(!(m_match_flags & (match_perl|match_posix))) + { + if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0) + m_match_flags |= match_perl; + else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex)) + m_match_flags |= match_perl; + else + m_match_flags |= match_posix; + } + if(m_match_flags & match_posix) + { + m_temp_match.reset(new match_results()); + m_presult = m_temp_match.get(); + } + else + m_presult = &m_result; +#ifdef BOOST_REGEX_NON_RECURSIVE + m_stack_base = 0; + m_backup_state = 0; +#endif + // find the value to use for matching word boundaries: + m_word_mask = re.get_data().m_word_mask; + // find bitmask to use for matching '.': + match_any_mask = static_cast((f & match_not_dot_newline) ? re_detail::test_not_newline : re_detail::test_newline); +} + +template +void perl_matcher::estimate_max_state_count(std::random_access_iterator_tag*) +{ + // + // How many states should we allow our machine to visit before giving up? + // This is a heuristic: it takes the greater of O(N^2) and O(NS^2) + // where N is the length of the string, and S is the number of states + // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2) + // but these take unreasonably amounts of time to bale out in pathological + // cases. + // + // Calculate NS^2 first: + // + static const std::ptrdiff_t k = 100000; + std::ptrdiff_t dist = cutl_details_boost::re_detail::distance(base, last); + if(dist == 0) + dist = 1; + std::ptrdiff_t states = re.size(); + if(states == 0) + states = 1; + states *= states; + if((std::numeric_limits::max)() / dist < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= dist; + if((std::numeric_limits::max)() - k < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states += k; + + max_state_count = states; + + // + // Now calculate N^2: + // + states = dist; + if((std::numeric_limits::max)() / dist < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states *= dist; + if((std::numeric_limits::max)() - k < states) + { + max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits::max)() - 2); + return; + } + states += k; + // + // N^2 can be a very large number indeed, to prevent things getting out + // of control, cap the max states: + // + if(states > BOOST_REGEX_MAX_STATE_COUNT) + states = BOOST_REGEX_MAX_STATE_COUNT; + // + // If (the possibly capped) N^2 is larger than our first estimate, + // use this instead: + // + if(states > max_state_count) + max_state_count = states; +} + +template +inline void perl_matcher::estimate_max_state_count(void*) +{ + // we don't know how long the sequence is: + max_state_count = BOOST_REGEX_MAX_STATE_COUNT; +} + +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD +template +inline bool perl_matcher::protected_call( + protected_proc_type proc) +{ + ::cutl_details_boost::re_detail::concrete_protected_call + > + obj(this, proc); + return obj.execute(); + +} +#endif + +template +inline bool perl_matcher::match() +{ +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + return protected_call(&perl_matcher::match_imp); +#else + return match_imp(); +#endif +} + +template +bool perl_matcher::match_imp() +{ + // initialise our stack if we are non-recursive: +#ifdef BOOST_REGEX_NON_RECURSIVE + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif +#endif + + // reset our state machine: + position = base; + search_base = base; + state_count = 0; + m_match_flags |= regex_constants::match_all; + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); + m_presult->set_base(base); + m_presult->set_named_subs(this->re.get_named_subs()); + if(m_match_flags & match_posix) + m_result = *m_presult; + verify_options(re.flags(), m_match_flags); + if(0 == match_prefix()) + return false; + return (m_result[0].second == last) && (m_result[0].first == base); + +#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template +inline bool perl_matcher::find() +{ +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + return protected_call(&perl_matcher::find_imp); +#else + return find_imp(); +#endif +} + +template +bool perl_matcher::find_imp() +{ + static matcher_proc_type const s_find_vtable[7] = + { + &perl_matcher::find_restart_any, + &perl_matcher::find_restart_word, + &perl_matcher::find_restart_line, + &perl_matcher::find_restart_buf, + &perl_matcher::match_prefix, + &perl_matcher::find_restart_lit, + &perl_matcher::find_restart_lit, + }; + + // initialise our stack if we are non-recursive: +#ifdef BOOST_REGEX_NON_RECURSIVE + save_state_init init(&m_stack_base, &m_backup_state); + used_block_count = BOOST_REGEX_MAX_BLOCKS; +#if !defined(BOOST_NO_EXCEPTIONS) + try{ +#endif +#endif + + state_count = 0; + if((m_match_flags & regex_constants::match_init) == 0) + { + // reset our state machine: + search_base = position = base; + pstate = re.get_first_state(); + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), base, last); + m_presult->set_base(base); + m_presult->set_named_subs(this->re.get_named_subs()); + m_match_flags |= regex_constants::match_init; + } + else + { + // start again: + search_base = position = m_result[0].second; + // If last match was null and match_not_null was not set then increment + // our start position, otherwise we go into an infinite loop: + if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0)) + { + if(position == last) + return false; + else + ++position; + } + // reset $` start: + m_presult->set_size((m_match_flags & match_nosubs) ? 1 : re.mark_count(), search_base, last); + //if((base != search_base) && (base == backstop)) + // m_match_flags |= match_prev_avail; + } + if(m_match_flags & match_posix) + { + m_result.set_size(re.mark_count(), base, last); + m_result.set_base(base); + } + + verify_options(re.flags(), m_match_flags); + // find out what kind of expression we have: + unsigned type = (m_match_flags & match_continuous) ? + static_cast(regbase::restart_continue) + : static_cast(re.get_restart_type()); + + // call the appropriate search routine: + matcher_proc_type proc = s_find_vtable[type]; + return (this->*proc)(); + +#if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS) + } + catch(...) + { + // unwind all pushed states, apart from anything else this + // ensures that all the states are correctly destructed + // not just the memory freed. + while(unwind(true)){} + throw; + } +#endif +} + +template +bool perl_matcher::match_prefix() +{ + m_has_partial_match = false; + m_has_found_match = false; + pstate = re.get_first_state(); + m_presult->set_first(position); + restart = position; + match_all_states(); + if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial)) + { + m_has_found_match = true; + m_presult->set_second(last, 0, false); + position = last; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(m_has_found_match && (match_extra & m_match_flags)) + { + // + // we have a match, reverse the capture information: + // + for(unsigned i = 0; i < m_presult->size(); ++i) + { + typename sub_match::capture_sequence_type & seq = ((*m_presult)[i]).get_captures(); + std::reverse(seq.begin(), seq.end()); + } + } +#endif + if(!m_has_found_match) + position = restart; // reset search postion + return m_has_found_match; +} + +template +bool perl_matcher::match_literal() +{ + unsigned int len = static_cast(pstate)->length; + const char_type* what = reinterpret_cast(static_cast(pstate) + 1); + // + // compare string with what we stored in + // our records: + for(unsigned int i = 0; i < len; ++i, ++position) + { + if((position == last) || (traits_inst.translate(*position, icase) != what[i])) + return false; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_start_line() +{ + if(position == backstop) + { + if((m_match_flags & match_prev_avail) == 0) + { + if((m_match_flags & match_not_bol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; + } + } + else if(m_match_flags & match_single_line) + return false; + + // check the previous value character: + BidiIterator t(position); + --t; + if(position != last) + { + if(is_separator(*t) && !((*t == static_cast('\r')) && (*position == static_cast('\n'))) ) + { + pstate = pstate->next.p; + return true; + } + } + else if(is_separator(*t)) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_end_line() +{ + if(position != last) + { + if(m_match_flags & match_single_line) + return false; + // we're not yet at the end so *first is always valid: + if(is_separator(*position)) + { + if((position != backstop) || (m_match_flags & match_prev_avail)) + { + // check that we're not in the middle of \r\n sequence + BidiIterator t(position); + --t; + if((*t == static_cast('\r')) && (*position == static_cast('\n'))) + { + return false; + } + } + pstate = pstate->next.p; + return true; + } + } + else if((m_match_flags & match_not_eol) == 0) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_wild() +{ + if(position == last) + return false; + if(is_separator(*position) && ((match_any_mask & static_cast(pstate)->mask) == 0)) + return false; + if((*position == char_type(0)) && (m_match_flags & match_not_dot_null)) + return false; + pstate = pstate->next.p; + ++position; + return true; +} + +template +bool perl_matcher::match_word_boundary() +{ + bool b; // indcates whether next character is a word character + if(position != last) + { + // prev and this character must be opposites: + #if defined(BOOST_REGEX_USE_C_LOCALE) && defined(__GNUC__) && (__GNUC__ == 2) && (__GNUC_MINOR__ < 95) + b = traits::isctype(*position, m_word_mask); + #else + b = traits_inst.isctype(*position, m_word_mask); + #endif + } + else + { + b = (m_match_flags & match_not_eow) ? true : false; + } + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + b ^= true; + else + b ^= false; + } + else + { + --position; + b ^= traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b) + { + pstate = pstate->next.p; + return true; + } + return false; // no match if we get to here... +} + +template +bool perl_matcher::match_within_word() +{ + if(position == last) + return false; + // both prev and this character must be m_word_mask: + bool prev = traits_inst.isctype(*position, m_word_mask); + { + bool b; + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; + else + { + --position; + b = traits_inst.isctype(*position, m_word_mask); + ++position; + } + if(b == prev) + { + pstate = pstate->next.p; + return true; + } + } + return false; +} + +template +bool perl_matcher::match_word_start() +{ + if(position == last) + return false; // can't be starting a word if we're already at the end of input + if(!traits_inst.isctype(*position, m_word_mask)) + return false; // next character isn't a word character + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + { + if(m_match_flags & match_not_bow) + return false; // no previous input + } + else + { + // otherwise inside buffer: + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask)) + return false; // previous character not non-word + } + // OK we have a match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_word_end() +{ + if((position == backstop) && ((m_match_flags & match_prev_avail) == 0)) + return false; // start of buffer can't be end of word + BidiIterator t(position); + --t; + if(traits_inst.isctype(*t, m_word_mask) == false) + return false; // previous character wasn't a word character + + if(position == last) + { + if(m_match_flags & match_not_eow) + return false; // end of buffer but not end of word + } + else + { + // otherwise inside buffer: + if(traits_inst.isctype(*position, m_word_mask)) + return false; // next character is a word character + } + pstate = pstate->next.p; + return true; // if we fall through to here then we've succeeded +} + +template +bool perl_matcher::match_buffer_start() +{ + if((position != backstop) || (m_match_flags & match_not_bob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_buffer_end() +{ + if((position != last) || (m_match_flags & match_not_eob)) + return false; + // OK match: + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_backref() +{ + // + // Compare with what we previously matched. + // Note that this succeeds if the backref did not partisipate + // in the match, this is in line with ECMAScript, but not Perl + // or PCRE. + // + int index = static_cast(pstate)->index; + if(index >= 10000) + { + named_subexpressions::range_type r = re.get_data().equal_range(index); + BOOST_ASSERT(r.first != r.second); + do + { + index = r.first->index; + ++r.first; + }while((r.first != r.second) && ((*m_presult)[index].matched != true)); + } + + if((m_match_flags & match_perl) && !(*m_presult)[index].matched) + return false; + + BidiIterator i = (*m_presult)[index].first; + BidiIterator j = (*m_presult)[index].second; + while(i != j) + { + if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase))) + return false; + ++i; + ++position; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_long_set() +{ + typedef typename traits::char_class_type char_class_type; + // let the traits class do the work: + if(position == last) + return false; + BidiIterator t = re_is_set_member(position, last, static_cast*>(pstate), re.get_data(), icase); + if(t != position) + { + pstate = pstate->next.p; + position = t; + return true; + } + return false; +} + +template +bool perl_matcher::match_set() +{ + if(position == last) + return false; + if(static_cast(pstate)->_map[static_cast(traits_inst.translate(*position, icase))]) + { + pstate = pstate->next.p; + ++position; + return true; + } + return false; +} + +template +bool perl_matcher::match_jump() +{ + pstate = static_cast(pstate)->alt.p; + return true; +} + +template +bool perl_matcher::match_combining() +{ + if(position == last) + return false; + if(is_combining(traits_inst.translate(*position, icase))) + return false; + ++position; + while((position != last) && is_combining(traits_inst.translate(*position, icase))) + ++position; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_soft_buffer_end() +{ + if(m_match_flags & match_not_eob) + return false; + BidiIterator p(position); + while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p; + if(p != last) + return false; + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_restart_continue() +{ + if(position == search_base) + { + pstate = pstate->next.p; + return true; + } + return false; +} + +template +bool perl_matcher::match_backstep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if( ::cutl_details_boost::is_random_access_iterator::value) + { + std::ptrdiff_t maxlen = ::cutl_details_boost::re_detail::distance(backstop, position); + if(maxlen < static_cast(pstate)->index) + return false; + std::advance(position, -static_cast(pstate)->index); + } + else + { + int c = static_cast(pstate)->index; + while(c--) + { + if(position == backstop) + return false; + --position; + } + } + pstate = pstate->next.p; + return true; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +inline bool perl_matcher::match_assert_backref() +{ + // return true if marked sub-expression N has been matched: + int index = static_cast(pstate)->index; + bool result = false; + if(index == 9999) + { + // Magic value for a (DEFINE) block: + return false; + } + else if(index > 0) + { + // Have we matched subexpression "index"? + // Check if index is a hash value: + if(index >= 10000) + { + named_subexpressions::range_type r = re.get_data().equal_range(index); + while(r.first != r.second) + { + if((*m_presult)[r.first->index].matched) + { + result = true; + break; + } + ++r.first; + } + } + else + { + result = (*m_presult)[index].matched; + } + pstate = pstate->next.p; + } + else + { + // Have we recursed into subexpression "index"? + // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1. + int idx = -index-1; + if(idx >= 10000) + { + named_subexpressions::range_type r = re.get_data().equal_range(idx); + int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx; + while(r.first != r.second) + { + result |= (stack_index == r.first->index); + if(result)break; + ++r.first; + } + } + else + { + result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0)); + } + pstate = pstate->next.p; + } + return result; +} + +template +bool perl_matcher::match_toggle_case() +{ + // change our case sensitivity: + this->icase = static_cast(pstate)->icase; + pstate = pstate->next.p; + return true; +} + + +template +bool perl_matcher::find_restart_any() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + const unsigned char* _map = re.get_map(); + while(true) + { + // skip everything we can't match: + while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) ) + ++position; + if(position == last) + { + // run out of characters, try a null match if possible: + if(re.can_be_null()) + return match_prefix(); + break; + } + // now try and obtain a match: + if(match_prefix()) + return true; + if(position == last) + return false; + ++position; + } + return false; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::find_restart_word() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + // do search optimised for word starts: + const unsigned char* _map = re.get_map(); + if((m_match_flags & match_prev_avail) || (position != base)) + --position; + else if(match_prefix()) + return true; + do + { + while((position != last) && traits_inst.isctype(*position, m_word_mask)) + ++position; + while((position != last) && !traits_inst.isctype(*position, m_word_mask)) + ++position; + if(position == last) + break; + + if(can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + break; + } while(true); + return false; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::find_restart_line() +{ + // do search optimised for line starts: + const unsigned char* _map = re.get_map(); + if(match_prefix()) + return true; + while(position != last) + { + while((position != last) && !is_separator(*position)) + ++position; + if(position == last) + return false; + ++position; + if(position == last) + { + if(re.can_be_null() && match_prefix()) + return true; + return false; + } + + if( can_start(*position, _map, (unsigned char)mask_any) ) + { + if(match_prefix()) + return true; + } + if(position == last) + return false; + //++position; + } + return false; +} + +template +bool perl_matcher::find_restart_buf() +{ + if((position == base) && ((m_match_flags & match_not_bob) == 0)) + return match_prefix(); + return false; +} + +template +bool perl_matcher::find_restart_lit() +{ +#if 0 + if(position == last) + return false; // can't possibly match if we're at the end already + + unsigned type = (m_match_flags & match_continuous) ? + static_cast(regbase::restart_continue) + : static_cast(re.get_restart_type()); + + const kmp_info* info = access::get_kmp(re); + int len = info->len; + const char_type* x = info->pstr; + int j = 0; + while (position != last) + { + while((j > -1) && (x[j] != traits_inst.translate(*position, icase))) + j = info->kmp_next[j]; + ++position; + ++j; + if(j >= len) + { + if(type == regbase::restart_fixed_lit) + { + std::advance(position, -j); + restart = position; + std::advance(restart, len); + m_result.set_first(position); + m_result.set_second(restart); + position = restart; + return true; + } + else + { + restart = position; + std::advance(position, -j); + if(match_prefix()) + return true; + else + { + for(int k = 0; (restart != position) && (k < j); ++k, --restart) + {} // dwa 10/20/2000 - warning suppression for MWCW + if(restart != last) + ++restart; + position = restart; + j = 0; //we could do better than this... + } + } + } + } + if((m_match_flags & match_partial) && (position == last) && j) + { + // we need to check for a partial match: + restart = position; + std::advance(position, -j); + return match_prefix(); + } +#endif + return false; +} + +} // namespace re_detail + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef __BORLANDC__ +# pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp b/cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp new file mode 100644 index 0000000..e4b1e1e --- /dev/null +++ b/cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -0,0 +1,1639 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the non-recursive implementation. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP + +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +template +inline void inplace_destroy(T* p) +{ + (void)p; // warning suppression + p->~T(); +} + +struct saved_state +{ + union{ + unsigned int state_id; + // this padding ensures correct alignment on 64-bit platforms: + std::size_t padding1; + std::ptrdiff_t padding2; + void* padding3; + }; + saved_state(unsigned i) : state_id(i) {} +}; + +template +struct saved_matched_paren : public saved_state +{ + int index; + sub_match sub; + saved_matched_paren(int i, const sub_match& s) : saved_state(1), index(i), sub(s){}; +}; + +template +struct saved_position : public saved_state +{ + const re_syntax_base* pstate; + BidiIterator position; + saved_position(const re_syntax_base* ps, BidiIterator pos, int i) : saved_state(i), pstate(ps), position(pos){}; +}; + +template +struct saved_assertion : public saved_position +{ + bool positive; + saved_assertion(bool p, const re_syntax_base* ps, BidiIterator pos) + : saved_position(ps, pos, saved_type_assertion), positive(p){}; +}; + +template +struct saved_repeater : public saved_state +{ + repeater_count count; + saved_repeater(int i, repeater_count** s, BidiIterator start) + : saved_state(saved_state_repeater_count), count(i,s,start){} +}; + +struct saved_extra_block : public saved_state +{ + saved_state *base, *end; + saved_extra_block(saved_state* b, saved_state* e) + : saved_state(saved_state_extra_block), base(b), end(e) {} +}; + +struct save_state_init +{ + saved_state** stack; + save_state_init(saved_state** base, saved_state** end) + : stack(base) + { + *base = static_cast(get_mem_block()); + *end = reinterpret_cast(reinterpret_cast(*base)+BOOST_REGEX_BLOCKSIZE); + --(*end); + (void) new (*end)saved_state(0); + BOOST_ASSERT(*end > *base); + } + ~save_state_init() + { + put_mem_block(*stack); + *stack = 0; + } +}; + +template +struct saved_single_repeat : public saved_state +{ + std::size_t count; + const re_repeat* rep; + BidiIterator last_position; + saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id) + : saved_state(arg_id), count(c), rep(r), last_position(lp){} +}; + +template +struct saved_recursion : public saved_state +{ + saved_recursion(int idx, const re_syntax_base* p, Results* pr) + : saved_state(14), recursion_id(idx), preturn_address(p), results(*pr) + {} + int recursion_id; + const re_syntax_base* preturn_address; + Results results; +}; + +template +bool perl_matcher::match_all_states() +{ + static matcher_proc_type const s_match_vtable[30] = + { + (&perl_matcher::match_startmark), + &perl_matcher::match_endmark, + &perl_matcher::match_literal, + &perl_matcher::match_start_line, + &perl_matcher::match_end_line, + &perl_matcher::match_wild, + &perl_matcher::match_match, + &perl_matcher::match_word_boundary, + &perl_matcher::match_within_word, + &perl_matcher::match_word_start, + &perl_matcher::match_word_end, + &perl_matcher::match_buffer_start, + &perl_matcher::match_buffer_end, + &perl_matcher::match_backref, + &perl_matcher::match_long_set, + &perl_matcher::match_set, + &perl_matcher::match_jump, + &perl_matcher::match_alt, + &perl_matcher::match_rep, + &perl_matcher::match_combining, + &perl_matcher::match_soft_buffer_end, + &perl_matcher::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::cutl_details_boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), + &perl_matcher::match_dot_repeat_dispatch, + &perl_matcher::match_char_repeat, + &perl_matcher::match_set_repeat, + &perl_matcher::match_long_set_repeat, + &perl_matcher::match_backstep, + &perl_matcher::match_assert_backref, + &perl_matcher::match_toggle_case, + &perl_matcher::match_recursion, + }; + + push_recursion_stopper(); + do{ + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_complexity); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + bool successful_unwind = unwind(false); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(false == successful_unwind) + return m_recursive_result; + } + } + }while(unwind(true)); + return m_recursive_result; +} + +template +void perl_matcher::extend_stack() +{ + if(used_block_count) + { + --used_block_count; + saved_state* stack_base; + saved_state* backup_state; + stack_base = static_cast(get_mem_block()); + backup_state = reinterpret_cast(reinterpret_cast(stack_base)+BOOST_REGEX_BLOCKSIZE); + saved_extra_block* block = static_cast(backup_state); + --block; + (void) new (block) saved_extra_block(m_stack_base, m_backup_state); + m_stack_base = stack_base; + m_backup_state = block; + } + else + raise_error(traits_inst, regex_constants::error_stack); +} + +template +inline void perl_matcher::push_matched_paren(int index, const sub_match& sub) +{ + //BOOST_ASSERT(index); + saved_matched_paren* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_matched_paren(index, sub); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion_stopper() +{ + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(saved_type_recurse); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_assertion(const re_syntax_base* ps, bool positive) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_assertion(positive, ps, position); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_alt(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_alt); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_non_greedy_repeat(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_non_greedy_long_repeat); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_repeater_count(int i, repeater_count** s) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_repeater(i, s, position); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_single_repeat(c, r, last_position, state_id); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion(int idx, const re_syntax_base* p, results_type* presults) +{ + saved_recursion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_recursion(idx, p, presults); + m_backup_state = pmp; +} + +template +bool perl_matcher::match_startmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + push_assertion(next_pstate, index == -1); + break; + } + case -3: + { + // independent sub-expression, currently this is always recursive: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + match_results temp_match(*m_presult); + unsigned i; + for(i = 0; i < temp_match.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: + r = match_all_states(); + // now place the stored captures back: + for(i = 0; i < temp_match.size(); ++i) + { + typedef typename sub_match::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = temp_match[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + return r; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast(pstate->next.p); + BOOST_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + position = saved_position; + if(negated) + r = !r; + if(r) + pstate = next_pstate; + else + pstate = alt->alt.p; + break; + } + } + case -5: + { + push_matched_paren(0, (*m_presult)[0]); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + break; + } + default: + { + BOOST_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + push_matched_paren(index, (*m_presult)[index]); + m_presult->set_first(position, index); + } + pstate = pstate->next.p; + break; + } + } + return true; +} + +template +bool perl_matcher::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + push_alt(jmp->alt.p); + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template +bool perl_matcher::match_rep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if((m_backup_state->state_id != saved_state_repeater_count) + || (static_cast*>(m_backup_state)->count.get_id() != rep->state_id) + || (next_count->get_id() != rep->state_id)) + { + // we're moving to a different repeat from the last + // one, so set up a counter object: + push_repeater_count(rep->state_id, &next_count); + } + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + return false; + } + + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + if(take_second) + { + // store position in case we fail: + push_alt(rep->alt.p); + } + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + else if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + push_non_greedy_repeat(rep->next.p); + } + pstate = rep->alt.p; + return true; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + } + return false; +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_dot_repeat_slow() +{ + unsigned count = 0; + const re_repeat* rep = static_cast(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsary repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // repeat for as long as we can: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_slow_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_dot_repeat_fast() +{ + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + + const re_repeat* rep = static_cast(pstate); + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + unsigned count = static_cast((std::min)(static_cast(::cutl_details_boost::re_detail::distance(position, last)), static_cast(greedy ? rep->max : rep->min))); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_fast_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_char_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + BOOST_ASSERT(1 == static_cast(rep->next.p)->length); + const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_char); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + const unsigned char* map = static_cast(rep->next.p)->_map; + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_short_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_long_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type mask_type; + const re_repeat* rep = static_cast(pstate); + const re_set_long* set = static_cast*>(pstate->next.p); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_long_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Backup call stack: + // + push_recursion_pop(); + // + // Set new call stack: + // + if(recursion_stack.capacity() == 0) + { + recursion_stack.reserve(50); + } + recursion_stack.push_back(recursion_info()); + recursion_stack.back().preturn_address = pstate->next.p; + recursion_stack.back().results = *m_presult; + if(static_cast(pstate)->state_id > 0) + { + push_repeater_count(static_cast(pstate)->state_id, &next_count); + } + pstate = static_cast(pstate)->alt.p; + recursion_stack.back().idx = static_cast(pstate)->index; + + return true; +} + +template +bool perl_matcher::match_endmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(!recursion_stack.empty()) + { + if(index == recursion_stack.back().idx) + { + pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, &recursion_stack.back().results); + recursion_stack.pop_back(); + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_match() +{ + if(!recursion_stack.empty()) + { + BOOST_ASSERT(0 == recursion_stack.back().idx); + pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, &recursion_stack.back().results); + recursion_stack.pop_back(); + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + +/**************************************************************************** + +Unwind and associated proceedures follow, these perform what normal stack +unwinding does in the recursive implementation. + +****************************************************************************/ + +template +bool perl_matcher::unwind(bool have_match) +{ + static unwind_proc_type const s_unwind_table[18] = + { + &perl_matcher::unwind_end, + &perl_matcher::unwind_paren, + &perl_matcher::unwind_recursion_stopper, + &perl_matcher::unwind_assertion, + &perl_matcher::unwind_alt, + &perl_matcher::unwind_repeater_counter, + &perl_matcher::unwind_extra_block, + &perl_matcher::unwind_greedy_single_repeat, + &perl_matcher::unwind_slow_dot_repeat, + &perl_matcher::unwind_fast_dot_repeat, + &perl_matcher::unwind_char_repeat, + &perl_matcher::unwind_short_set_repeat, + &perl_matcher::unwind_long_set_repeat, + &perl_matcher::unwind_non_greedy_repeat, + &perl_matcher::unwind_recursion, + &perl_matcher::unwind_recursion_pop, + }; + + m_recursive_result = have_match; + unwind_proc_type unwinder; + bool cont; + // + // keep unwinding our stack until we have something to do: + // + do + { + unwinder = s_unwind_table[m_backup_state->state_id]; + cont = (this->*unwinder)(m_recursive_result); + }while(cont); + // + // return true if we have more states to try: + // + return pstate ? true : false; +} + +template +bool perl_matcher::unwind_end(bool) +{ + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_paren(bool have_match) +{ + saved_matched_paren* pmp = static_cast*>(m_backup_state); + // restore previous values if no match was found: + if(have_match == false) + { + m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); + m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(pmp->sub.matched && (match_extra & m_match_flags)) + ((*m_presult)[pmp->index]).get_captures().push_back(pmp->sub); +#endif + // unwind stack: + m_backup_state = pmp+1; + cutl_details_boost::re_detail::inplace_destroy(pmp); + return true; // keep looking +} + +template +bool perl_matcher::unwind_recursion_stopper(bool) +{ + cutl_details_boost::re_detail::inplace_destroy(m_backup_state++); + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_assertion(bool r) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + pstate = pmp->pstate; + position = pmp->position; + bool result = (r == pmp->positive); + m_recursive_result = pmp->positive ? r : !r; + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return !result; // return false if the assertion was matched to stop search. +} + +template +bool perl_matcher::unwind_alt(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + pstate = pmp->pstate; + position = pmp->position; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template +bool perl_matcher::unwind_repeater_counter(bool) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; // keep looking +} + +template +bool perl_matcher::unwind_extra_block(bool) +{ + saved_extra_block* pmp = static_cast(m_backup_state); + void* condemmed = m_stack_base; + m_stack_base = pmp->base; + m_backup_state = pmp->end; + cutl_details_boost::re_detail::inplace_destroy(pmp); + put_mem_block(condemmed); + return true; // keep looking +} + +template +inline void perl_matcher::destroy_single_repeat() +{ + saved_single_repeat* p = static_cast*>(m_backup_state); + cutl_details_boost::re_detail::inplace_destroy(p++); + m_backup_state = p; +} + +template +bool perl_matcher::unwind_greedy_single_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + + count -= rep->min; + + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + BOOST_ASSERT(count); + position = pmp->last_position; + + // backtrack till we can skip out: + do + { + --position; + --count; + ++state_count; + }while(count && !can_start(*position, rep->_map, mask_skip)); + + // if we've hit base, destroy this state: + if(count == 0) + { + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count + rep->min; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_slow_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_ASSERT(rep->type == syntax_element_dot_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_wild); + + BOOST_ASSERT(count < rep->max); + pstate = rep->next.p; + position = pmp->last_position; + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!match_wild()) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_fast_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + + BOOST_ASSERT(count < rep->max); + position = pmp->last_position; + if(position != last) + { + + // wind forward until we can skip out of the repeat: + do + { + ++position; + ++count; + ++state_count; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_char_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const char_type what = *reinterpret_cast(static_cast(pstate) + 1); + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_char_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_literal); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(traits_inst.translate(*position, icase) != what) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_short_set_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const unsigned char* map = static_cast(rep->next.p)->_map; + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_short_set_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_set); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!map[static_cast(traits_inst.translate(*position, icase))]) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_long_set_repeat(bool r) +{ + typedef typename traits::char_class_type mask_type; + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const re_set_long* set = static_cast*>(pstate); + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_long_set_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(position == re_is_set_member(position, last, set, re.get_data(), icase)) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++position; + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_non_greedy_repeat(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + position = pmp->position; + pstate = pmp->pstate; + ++(*next_count); + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template +bool perl_matcher::unwind_recursion(bool r) +{ + saved_recursion* pmp = static_cast*>(m_backup_state); + if(!r) + { + recursion_stack.push_back(recursion_info()); + recursion_stack.back().idx = pmp->recursion_id; + recursion_stack.back().preturn_address = pmp->preturn_address; + recursion_stack.back().results = pmp->results; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +bool perl_matcher::unwind_recursion_pop(bool r) +{ + saved_state* pmp = static_cast(m_backup_state); + if(!r) + { + recursion_stack.pop_back(); + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +void perl_matcher::push_recursion_pop() +{ + saved_state* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(15); + m_backup_state = pmp; +} +/* +template +bool perl_matcher::unwind_parenthesis_pop(bool r) +{ + saved_state* pmp = static_cast(m_backup_state); + if(!r) + { + --parenthesis_stack_position; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +void perl_matcher::push_parenthesis_pop() +{ + saved_state* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; +} + +template +bool perl_matcher::unwind_parenthesis_push(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + parenthesis_stack[parenthesis_stack_position++] = pmp->position; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +inline void perl_matcher::push_parenthesis_push(BidiIterator p) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(0, p, 17); + m_backup_state = pmp; +} +*/ +} // namespace re_detail +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + diff --git a/cutl/details/boost/regex/v4/perl_matcher_recursive.hpp b/cutl/details/boost/regex/v4/perl_matcher_recursive.hpp new file mode 100644 index 0000000..6b4492c --- /dev/null +++ b/cutl/details/boost/regex/v4/perl_matcher_recursive.hpp @@ -0,0 +1,991 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the recursive implementation. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_RECURSIVE_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +template +class backup_subex +{ + int index; + sub_match sub; +public: + template + backup_subex(const match_results& w, int i) + : index(i), sub(w[i], false) {} + template + void restore(match_results& w) + { + w.set_first(sub.first, index, index == 0); + w.set_second(sub.second, index, sub.matched, index == 0); + } + const sub_match& get() { return sub; } +}; + +template +bool perl_matcher::match_all_states() +{ + static matcher_proc_type const s_match_vtable[30] = + { + (&perl_matcher::match_startmark), + &perl_matcher::match_endmark, + &perl_matcher::match_literal, + &perl_matcher::match_start_line, + &perl_matcher::match_end_line, + &perl_matcher::match_wild, + &perl_matcher::match_match, + &perl_matcher::match_word_boundary, + &perl_matcher::match_within_word, + &perl_matcher::match_word_start, + &perl_matcher::match_word_end, + &perl_matcher::match_buffer_start, + &perl_matcher::match_buffer_end, + &perl_matcher::match_backref, + &perl_matcher::match_long_set, + &perl_matcher::match_set, + &perl_matcher::match_jump, + &perl_matcher::match_alt, + &perl_matcher::match_rep, + &perl_matcher::match_combining, + &perl_matcher::match_soft_buffer_end, + &perl_matcher::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::cutl_details_boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), + &perl_matcher::match_dot_repeat_dispatch, + &perl_matcher::match_char_repeat, + &perl_matcher::match_set_repeat, + &perl_matcher::match_long_set_repeat, + &perl_matcher::match_backstep, + &perl_matcher::match_assert_backref, + &perl_matcher::match_toggle_case, + &perl_matcher::match_recursion, + }; + + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_complexity); + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + return 0; + } + } + return true; +} + +template +bool perl_matcher::match_startmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + bool r = true; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + BidiIterator old_position(position); + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + r = match_all_states(); + pstate = next_pstate; + position = old_position; + if((r && (index != -1)) || (!r && (index != -2))) + r = false; + else + r = true; + break; + } + case -3: + { + // independent sub-expression: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + r = match_all_states(); + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + unsigned i; + match_results tm(*m_presult); + for(i = 0; i < tm.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: + r = match_all_states(); + // now place the stored captures back: + for(i = 0; i < tm.size(); ++i) + { + typedef typename sub_match::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = tm[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + break; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast(pstate->next.p); + BOOST_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool res = match_all_states(); + position = saved_position; + if(negated) + res = !res; + if(res) + pstate = next_pstate; + else + pstate = alt->alt.p; + break; + } + } + case -5: + { + // Reset start of $0, since we have a \K escape + backup_subex sub(*m_presult, 0); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + r = match_all_states(); + if(r == false) + sub.restore(*m_presult); + break; + } + default: + { + BOOST_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + backup_subex sub(*m_presult, index); + m_presult->set_first(position, index); + pstate = pstate->next.p; + r = match_all_states(); + if(r == false) + sub.restore(*m_presult); +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(sub.get().matched && (match_extra & m_match_flags)) + ((*m_presult)[index]).get_captures().push_back(sub.get()); +#endif + } + else + { + pstate = pstate->next.p; + } + break; + } + } + return r; +} + +template +bool perl_matcher::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + BidiIterator oldposition(position); + const re_syntax_base* old_pstate = jmp->alt.p; + pstate = pstate->next.p; + if(!match_all_states()) + { + pstate = old_pstate; + position = oldposition; + } + return true; + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template +bool perl_matcher::match_rep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif + const re_repeat* rep = static_cast(pstate); + // + // Always copy the repeat count, so that the state is restored + // when we exit this scope: + // + repeater_count r(rep->state_id, &next_count, position); + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return match_all_states(); + } + return false; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + BidiIterator pos = position; + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + if(match_all_states()) + return true; + // failed repeat, reset posistion and fall through for alternative: + position = pos; + } + if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + // store position in case we fail: + BidiIterator pos = position; + pstate = rep->alt.p; + if(match_all_states()) + return true; + // failed alternative, reset posistion and fall through for repeat: + position = pos; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return match_all_states(); + } + } + return false; +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_dot_repeat_slow() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + unsigned count = 0; + const re_repeat* rep = static_cast(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsary repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // normal repeat: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + if((rep->leading) && (count < rep->max)) + restart = position; + pstate = rep; + return backtrack_till_match(count - rep->min); + } + else + { + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + ++count; + pstate = psingle; + position = save_pos; + if(!match_wild()) + return false; + }while(true); + } +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_dot_repeat_fast() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + // + // start by working out how much we can skip: + // + const re_repeat* rep = static_cast(pstate); +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4267) +#endif + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t count = (std::min)(static_cast(::cutl_details_boost::re_detail::distance(position, last)), static_cast(greedy ? rep->max : rep->min)); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + ++position; + ++count; + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + if(save_pos == last) + return false; + position = ++save_pos; + ++count; + }while(true); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_char_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#pragma warning(disable:4267) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + BOOST_ASSERT(1 == static_cast(rep->next.p)->length); + const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t count, desired; + if(::cutl_details_boost::is_random_access_iterator::value) + { + desired = + (std::min)( + (std::size_t)(greedy ? rep->max : rep->min), + (std::size_t)::cutl_details_boost::re_detail::distance(position, last)); + count = desired; + ++desired; + if(icase) + { + while(--desired && (traits_inst.translate_nocase(*position) == what)) + { + ++position; + } + } + else + { + while(--desired && (traits_inst.translate(*position) == what)) + { + ++position; + } + } + count = count - desired; + } + else + { + count = 0; + desired = greedy ? rep->max : rep->min; + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(count < rep->min) + return false; + + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + if((traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + else + return false; // counldn't repeat even though it was the only option + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + position = save_pos; + if(position == last) + return false; + if(traits_inst.translate(*position, icase) == what) + { + ++position; + ++count; + } + else + { + return false; + } + }while(true); +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + const unsigned char* map = static_cast(rep->next.p)->_map; + unsigned count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(count < rep->min) + return false; + + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + if(map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + else + return false; // counldn't repeat even though it was the only option + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + position = save_pos; + if(position == last) + return false; + if(map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + else + { + return false; + } + }while(true); +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_long_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type char_class_type; + const re_repeat* rep = static_cast(pstate); + const re_set_long* set = static_cast*>(pstate->next.p); + unsigned count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + if((rep->leading) && (count < rep->max) && greedy) + restart = position; + if(count < rep->min) + return false; + + if(greedy) + return backtrack_till_match(count - rep->min); + + // non-greedy, keep trying till we get a match: + BidiIterator save_pos; + do + { + while((position != last) && (count < rep->max) && !can_start(*position, rep->_map, mask_skip)) + { + if(position != re_is_set_member(position, last, set, re.get_data(), icase)) + { + ++position; + ++count; + } + else + return false; // counldn't repeat even though it was the only option + } + if((rep->leading) && (rep->max == UINT_MAX)) + restart = position; + pstate = rep->alt.p; + save_pos = position; + ++state_count; + if(match_all_states()) + return true; + if(count >= rep->max) + return false; + position = save_pos; + if(position == last) + return false; + if(position != re_is_set_member(position, last, set, re.get_data(), icase)) + { + ++position; + ++count; + } + else + { + return false; + } + }while(true); +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::backtrack_till_match(std::size_t count) +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + const re_repeat* rep = static_cast(pstate); + BidiIterator backtrack = position; + if(position == last) + { + if(rep->can_be_null & mask_skip) + { + pstate = rep->alt.p; + if(match_all_states()) + return true; + } + if(count) + { + position = --backtrack; + --count; + } + else + return false; + } + do + { + while(count && !can_start(*position, rep->_map, mask_skip)) + { + --position; + --count; + ++state_count; + } + pstate = rep->alt.p; + backtrack = position; + if(match_all_states()) + return true; + if(count == 0) + return false; + position = --backtrack; + ++state_count; + --count; + }while(true); +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Set new call stack: + // + if(recursion_stack.capacity() == 0) + { + recursion_stack.reserve(50); + } + recursion_stack.push_back(recursion_info()); + recursion_stack.back().preturn_address = pstate->next.p; + recursion_stack.back().results = *m_presult; + recursion_stack.back().repeater_stack = next_count; + pstate = static_cast(pstate)->alt.p; + recursion_stack.back().idx = static_cast(pstate)->index; + + repeater_count* saved = next_count; + repeater_count r(&next_count); // resets all repeat counts since we're recursing and starting fresh on those + next_count = &r; + bool result = match_all_states(); + next_count = saved; + + if(!result) + { + next_count = recursion_stack.back().repeater_stack; + *m_presult = recursion_stack.back().results; + recursion_stack.pop_back(); + return false; + } + return true; +} + +template +bool perl_matcher::match_endmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(!recursion_stack.empty()) + { + if(index == recursion_stack.back().idx) + { + recursion_info saved = recursion_stack.back(); + recursion_stack.pop_back(); + pstate = saved.preturn_address; + repeater_count* saved_count = next_count; + next_count = saved.repeater_stack; + *m_presult = saved.results; + if(!match_all_states()) + { + recursion_stack.push_back(saved); + next_count = saved_count; + return false; + } + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate ? pstate->next.p : 0; + return true; +} + +template +bool perl_matcher::match_match() +{ + if(!recursion_stack.empty()) + { + BOOST_ASSERT(0 == recursion_stack.back().idx); + const re_syntax_base* saved_state = pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + recursion_stack.pop_back(); + if(!match_all_states()) + { + recursion_stack.push_back(recursion_info()); + recursion_stack.back().preturn_address = saved_state; + recursion_stack.back().results = *m_presult; + return false; + } + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + + + +} // namespace re_detail +} // namespace cutl_details_boost +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/primary_transform.hpp b/cutl/details/boost/regex/v4/primary_transform.hpp new file mode 100644 index 0000000..ae59e5b --- /dev/null +++ b/cutl/details/boost/regex/v4/primary_transform.hpp @@ -0,0 +1,146 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE: primary_transform.hpp + * VERSION: see + * DESCRIPTION: Heuristically determines the sort string format in use + * by the current locale. + */ + +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#define BOOST_REGEX_PRIMARY_TRANSFORM + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ + namespace re_detail{ + + +enum{ + sort_C, + sort_fixed, + sort_delim, + sort_unknown +}; + +template +unsigned count_chars(const S& s, charT c) +{ + // + // Count how many occurances of character c occur + // in string s: if c is a delimeter between collation + // fields, then this should be the same value for all + // sort keys: + // + unsigned int count = 0; + for(unsigned pos = 0; pos < s.size(); ++pos) + { + if(s[pos] == c) ++count; + } + return count; +} + + +template +unsigned find_sort_syntax(const traits* pt, charT* delim) +{ + // + // compare 'a' with 'A' to see how similar they are, + // should really use a-accute but we can't portably do that, + // + typedef typename traits::string_type string_type; + typedef typename traits::char_type char_type; + + // Suppress incorrect warning for MSVC + (void)pt; + + char_type a[2] = {'a', '\0', }; + string_type sa(pt->transform(a, a+1)); + if(sa == a) + { + *delim = 0; + return sort_C; + } + char_type A[2] = { 'A', '\0', }; + string_type sA(pt->transform(A, A+1)); + char_type c[2] = { ';', '\0', }; + string_type sc(pt->transform(c, c+1)); + + int pos = 0; + while((pos <= static_cast(sa.size())) && (pos <= static_cast(sA.size())) && (sa[pos] == sA[pos])) ++pos; + --pos; + if(pos < 0) + { + *delim = 0; + return sort_unknown; + } + // + // at this point sa[pos] is either the end of a fixed width field + // or the character that acts as a delimiter: + // + charT maybe_delim = sa[pos]; + if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim))) + { + *delim = maybe_delim; + return sort_delim; + } + // + // OK doen't look like a delimiter, try for fixed width field: + // + if((sa.size() == sA.size()) && (sa.size() == sc.size())) + { + // note assumes that the fixed width field is less than + // (numeric_limits::max)(), should be true for all types + // I can't imagine 127 character fields... + *delim = static_cast(++pos); + return sort_fixed; + } + // + // don't know what it is: + // + *delim = 0; + return sort_unknown; +} + + + } // namespace re_detail +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + + + + + + diff --git a/cutl/details/boost/regex/v4/protected_call.hpp b/cutl/details/boost/regex/v4/protected_call.hpp new file mode 100644 index 0000000..fbf2a31 --- /dev/null +++ b/cutl/details/boost/regex/v4/protected_call.hpp @@ -0,0 +1,81 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE basic_regex_creator.cpp + * VERSION see + * DESCRIPTION: Declares template class basic_regex_creator which fills in + * the data members of a regex_data object. + */ + +#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP +#define BOOST_REGEX_V4_PROTECTED_CALL_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +class BOOST_REGEX_DECL abstract_protected_call +{ +public: + bool BOOST_REGEX_CALL execute()const; + // this stops gcc-4 from complaining: + virtual ~abstract_protected_call(){} +private: + virtual bool call()const = 0; +}; + +template +class concrete_protected_call + : public abstract_protected_call +{ +public: + typedef bool (T::*proc_type)(); + concrete_protected_call(T* o, proc_type p) + : obj(o), proc(p) {} +private: + virtual bool call()const; + T* obj; + proc_type proc; +}; + +template +bool concrete_protected_call::call()const +{ + return (obj->*proc)(); +} + +} +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/cutl/details/boost/regex/v4/regbase.hpp b/cutl/details/boost/regex/v4/regbase.hpp new file mode 100644 index 0000000..0c6f7ae --- /dev/null +++ b/cutl/details/boost/regex/v4/regbase.hpp @@ -0,0 +1,180 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regbase.cpp + * VERSION see + * DESCRIPTION: Declares class regbase. + */ + +#ifndef BOOST_REGEX_V4_REGBASE_HPP +#define BOOST_REGEX_V4_REGBASE_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +// +// class regbase +// handles error codes and flags +// +class BOOST_REGEX_DECL regbase +{ +public: + enum flag_type_ + { + // + // Divide the flags up into logical groups: + // bits 0-7 indicate main synatx type. + // bits 8-15 indicate syntax subtype. + // bits 16-31 indicate options that are common to all + // regex syntaxes. + // In all cases the default is 0. + // + // Main synatx group: + // + perl_syntax_group = 0, // default + basic_syntax_group = 1, // POSIX basic + literal = 2, // all characters are literals + main_option_type = literal | basic_syntax_group | perl_syntax_group, // everything! + // + // options specific to perl group: + // + no_bk_refs = 1 << 8, // \d not allowed + no_perl_ex = 1 << 9, // disable perl extensions + no_mod_m = 1 << 10, // disable Perl m modifier + mod_x = 1 << 11, // Perl x modifier + mod_s = 1 << 12, // force s modifier on (overrides match_not_dot_newline) + no_mod_s = 1 << 13, // force s modifier off (overrides match_not_dot_newline) + + // + // options specific to basic group: + // + no_char_classes = 1 << 8, // [[:CLASS:]] not allowed + no_intervals = 1 << 9, // {x,y} not allowed + bk_plus_qm = 1 << 10, // uses \+ and \? + bk_vbar = 1 << 11, // use \| for alternatives + emacs_ex = 1 << 12, // enables emacs extensions + + // + // options common to all groups: + // + no_escape_in_lists = 1 << 16, // '\' not special inside [...] + newline_alt = 1 << 17, // \n is the same as | + no_except = 1 << 18, // no exception on error + failbit = 1 << 19, // error flag + icase = 1 << 20, // characters are matched regardless of case + nocollate = 0, // don't use locale specific collation (deprecated) + collate = 1 << 21, // use locale specific collation + nosubs = 1 << 22, // don't mark sub-expressions + save_subexpression_location = 1 << 23, // save subexpression locations + no_empty_expressions = 1 << 24, // no empty expressions allowed + optimize = 0, // not really supported + + + + basic = basic_syntax_group | collate | no_escape_in_lists, + extended = no_bk_refs | collate | no_perl_ex | no_escape_in_lists, + normal = 0, + emacs = basic_syntax_group | collate | emacs_ex | bk_vbar, + awk = no_bk_refs | collate | no_perl_ex, + grep = basic | newline_alt, + egrep = extended | newline_alt, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef unsigned int flag_type; + + enum restart_info + { + restart_any = 0, + restart_word = 1, + restart_line = 2, + restart_buf = 3, + restart_continue = 4, + restart_lit = 5, + restart_fixed_lit = 6, + restart_count = 7 + }; +}; + +// +// provide std lib proposal compatible constants: +// +namespace regex_constants{ + + enum flag_type_ + { + + no_except = ::cutl_details_boost::regbase::no_except, + failbit = ::cutl_details_boost::regbase::failbit, + literal = ::cutl_details_boost::regbase::literal, + icase = ::cutl_details_boost::regbase::icase, + nocollate = ::cutl_details_boost::regbase::nocollate, + collate = ::cutl_details_boost::regbase::collate, + nosubs = ::cutl_details_boost::regbase::nosubs, + optimize = ::cutl_details_boost::regbase::optimize, + bk_plus_qm = ::cutl_details_boost::regbase::bk_plus_qm, + bk_vbar = ::cutl_details_boost::regbase::bk_vbar, + no_intervals = ::cutl_details_boost::regbase::no_intervals, + no_char_classes = ::cutl_details_boost::regbase::no_char_classes, + no_escape_in_lists = ::cutl_details_boost::regbase::no_escape_in_lists, + no_mod_m = ::cutl_details_boost::regbase::no_mod_m, + mod_x = ::cutl_details_boost::regbase::mod_x, + mod_s = ::cutl_details_boost::regbase::mod_s, + no_mod_s = ::cutl_details_boost::regbase::no_mod_s, + save_subexpression_location = ::cutl_details_boost::regbase::save_subexpression_location, + no_empty_expressions = ::cutl_details_boost::regbase::no_empty_expressions, + + basic = ::cutl_details_boost::regbase::basic, + extended = ::cutl_details_boost::regbase::extended, + normal = ::cutl_details_boost::regbase::normal, + emacs = ::cutl_details_boost::regbase::emacs, + awk = ::cutl_details_boost::regbase::awk, + grep = ::cutl_details_boost::regbase::grep, + egrep = ::cutl_details_boost::regbase::egrep, + sed = basic, + perl = normal, + ECMAScript = normal, + JavaScript = normal, + JScript = normal + }; + typedef ::cutl_details_boost::regbase::flag_type syntax_option_type; + +} // namespace regex_constants + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/regex.hpp b/cutl/details/boost/regex/v4/regex.hpp new file mode 100644 index 0000000..a442970 --- /dev/null +++ b/cutl/details/boost/regex/v4/regex.hpp @@ -0,0 +1,202 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex.cpp + * VERSION see + * DESCRIPTION: Declares cutl_details_boost::basic_regex<> and associated + * functions and classes. This header is the main + * entry point for the template regex code. + */ + +#ifndef BOOST_RE_REGEX_HPP_INCLUDED +#define BOOST_RE_REGEX_HPP_INCLUDED + +#ifdef __cplusplus + +// what follows is all C++ don't include in C builds!! + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#ifndef BOOST_REGEX_WORKAROUND_HPP +#include +#endif + +#ifndef BOOST_REGEX_FWD_HPP +#include +#endif +#ifndef BOOST_REGEX_TRAITS_HPP +#include +#endif +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_MATCH_FLAGS +#include +#endif +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#include +#endif +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#include +#endif + +#ifndef BOOST_REGEX_V4_CHAR_REGEX_TRAITS_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_STATES_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_REGBASE_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_ITERATOR_TRAITS_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_BASIC_REGEX_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_BASIC_REGEX_CREATOR_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_BASIC_REGEX_PARSER_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_SUB_MATCH_HPP +#include +#endif +#ifndef BOOST_REGEX_FORMAT_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_MATCH_RESULTS_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_PROTECTED_CALL_HPP +#include +#endif +#ifndef BOOST_REGEX_MATCHER_HPP +#include +#endif +// +// template instances: +// +#define BOOST_REGEX_CHAR_T char +#ifdef BOOST_REGEX_NARROW_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include +#undef BOOST_REGEX_CHAR_T +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif + +#ifndef BOOST_NO_WREGEX +#define BOOST_REGEX_CHAR_T wchar_t +#ifdef BOOST_REGEX_WIDE_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include +#undef BOOST_REGEX_CHAR_T +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif +#endif + +#if !defined(BOOST_NO_WREGEX) && defined(BOOST_REGEX_HAS_OTHER_WCHAR_T) +#define BOOST_REGEX_CHAR_T unsigned short +#ifdef BOOST_REGEX_US_INSTANTIATE +# define BOOST_REGEX_INSTANTIATE +#endif +#include +#undef BOOST_REGEX_CHAR_T +#ifdef BOOST_REGEX_INSTANTIATE +# undef BOOST_REGEX_INSTANTIATE +#endif +#endif + + +namespace cutl_details_boost{ +#ifdef BOOST_REGEX_NO_FWD +typedef basic_regex > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex > wregex; +#endif +#endif + +typedef match_results cmatch; +typedef match_results smatch; +#ifndef BOOST_NO_WREGEX +typedef match_results wcmatch; +typedef match_results wsmatch; +#endif + +} // namespace cutl_details_boost +#ifndef BOOST_REGEX_MATCH_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_REGEX_SEARCH_HPP +#include +#endif +#ifndef BOOST_REGEX_ITERATOR_HPP +#include +#endif +#ifndef BOOST_REGEX_TOKEN_ITERATOR_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_REGEX_GREP_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_REGEX_REPLACE_HPP +#include +#endif +#ifndef BOOST_REGEX_V4_REGEX_MERGE_HPP +#include +#endif +#ifndef BOOST_REGEX_SPLIT_HPP +#include +#endif + +#endif // __cplusplus + +#endif // include + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cutl/details/boost/regex/v4/regex_format.hpp b/cutl/details/boost/regex/v4/regex_format.hpp new file mode 100644 index 0000000..211859f --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_format.hpp @@ -0,0 +1,1141 @@ +/* + * + * Copyright (c) 1998-2009 John Maddock + * Copyright 2008 Eric Niebler. + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_FORMAT_HPP +#define BOOST_REGEX_FORMAT_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef BOOST_NO_SFINAE +#include +#endif +#include + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +// +// Forward declaration: +// + template >::allocator_type > +class match_results; + +namespace re_detail{ + +// +// struct trivial_format_traits: +// defines minimum localisation support for formatting +// in the case that the actual regex traits is unavailable. +// +template +struct trivial_format_traits +{ + typedef charT char_type; + + static std::ptrdiff_t length(const charT* p) + { + return global_length(p); + } + static charT tolower(charT c) + { + return ::cutl_details_boost::re_detail::global_lower(c); + } + static charT toupper(charT c) + { + return ::cutl_details_boost::re_detail::global_upper(c); + } + static int value(const charT c, int radix) + { + int result = global_value(c); + return result >= radix ? -1 : result; + } + int toi(const charT*& p1, const charT* p2, int radix)const + { + return global_toi(p1, p2, radix, *this); + } +}; + +template +class basic_regex_formatter +{ +public: + typedef typename traits::char_type char_type; + basic_regex_formatter(OutputIterator o, const Results& r, const traits& t) + : m_traits(t), m_results(r), m_out(o), m_state(output_copy), m_restore_state(output_copy), m_have_conditional(false) {} + OutputIterator format(ForwardIter p1, ForwardIter p2, match_flag_type f); + OutputIterator format(ForwardIter p1, match_flag_type f) + { + return format(p1, p1 + m_traits.length(p1), f); + } +private: + typedef typename Results::value_type sub_match_type; + enum output_state + { + output_copy, + output_next_lower, + output_next_upper, + output_lower, + output_upper, + output_none + }; + + void put(char_type c); + void put(const sub_match_type& sub); + void format_all(); + void format_perl(); + void format_escape(); + void format_conditional(); + void format_until_scope_end(); + bool handle_perl_verb(bool have_brace); + + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const mpl::false_&) + { + std::vector v(i, j); + return (i != j) ? this->m_results.named_subexpression(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression(static_cast(0), static_cast(0)); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j, const mpl::true_&) + { + return this->m_results.named_subexpression(i, j); + } + inline typename Results::value_type const& get_named_sub(ForwardIter i, ForwardIter j) + { + typedef typename cutl_details_boost::is_convertible::type tag_type; + return get_named_sub(i, j, tag_type()); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const mpl::false_&) + { + std::vector v(i, j); + return (i != j) ? this->m_results.named_subexpression_index(&v[0], &v[0] + v.size()) + : this->m_results.named_subexpression_index(static_cast(0), static_cast(0)); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j, const mpl::true_&) + { + return this->m_results.named_subexpression_index(i, j); + } + inline int get_named_sub_index(ForwardIter i, ForwardIter j) + { + typedef typename cutl_details_boost::is_convertible::type tag_type; + return get_named_sub_index(i, j, tag_type()); + } +#ifdef BOOST_MSVC + // msvc-8.0 issues a spurious warning on the call to std::advance here: +#pragma warning(push) +#pragma warning(disable:4244) +#endif + inline int toi(ForwardIter& i, ForwardIter j, int base, const cutl_details_boost::mpl::false_&) + { + if(i != j) + { + std::vector v(i, j); + const char_type* start = &v[0]; + const char_type* pos = start; + int r = m_traits.toi(pos, &v[0] + v.size(), base); + std::advance(i, pos - start); + return r; + } + return -1; + } +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + inline int toi(ForwardIter& i, ForwardIter j, int base, const cutl_details_boost::mpl::true_&) + { + return m_traits.toi(i, j, base); + } + inline int toi(ForwardIter& i, ForwardIter j, int base) + { + typedef typename cutl_details_boost::is_convertible::type tag_type; + return toi(i, j, base, tag_type()); + } + + const traits& m_traits; // the traits class for localised formatting operations + const Results& m_results; // the match_results being used. + OutputIterator m_out; // where to send output. + ForwardIter m_position; // format string, current position + ForwardIter m_end; // format string end + match_flag_type m_flags; // format flags to use + output_state m_state; // what to do with the next character + output_state m_restore_state; // what state to restore to. + bool m_have_conditional; // we are parsing a conditional +private: + basic_regex_formatter(const basic_regex_formatter&); + basic_regex_formatter& operator=(const basic_regex_formatter&); +}; + +template +OutputIterator basic_regex_formatter::format(ForwardIter p1, ForwardIter p2, match_flag_type f) +{ + m_position = p1; + m_end = p2; + m_flags = f; + format_all(); + return m_out; +} + +template +void basic_regex_formatter::format_all() +{ + // over and over: + while(m_position != m_end) + { + switch(*m_position) + { + case '&': + if(m_flags & ::cutl_details_boost::regex_constants::format_sed) + { + ++m_position; + put(m_results[0]); + break; + } + put(*m_position++); + break; + case '\\': + format_escape(); + break; + case '(': + if(m_flags & cutl_details_boost::regex_constants::format_all) + { + ++m_position; + bool have_conditional = m_have_conditional; + m_have_conditional = false; + format_until_scope_end(); + m_have_conditional = have_conditional; + if(m_position == m_end) + return; + BOOST_ASSERT(*m_position == static_cast(')')); + ++m_position; // skip the closing ')' + break; + } + put(*m_position); + ++m_position; + break; + case ')': + if(m_flags & cutl_details_boost::regex_constants::format_all) + { + return; + } + put(*m_position); + ++m_position; + break; + case ':': + if((m_flags & cutl_details_boost::regex_constants::format_all) && m_have_conditional) + { + return; + } + put(*m_position); + ++m_position; + break; + case '?': + if(m_flags & cutl_details_boost::regex_constants::format_all) + { + ++m_position; + format_conditional(); + break; + } + put(*m_position); + ++m_position; + break; + case '$': + if((m_flags & format_sed) == 0) + { + format_perl(); + break; + } + // fall through, not a special character: + default: + put(*m_position); + ++m_position; + break; + } + } +} + +template +void basic_regex_formatter::format_perl() +{ + // + // On entry *m_position points to a '$' character + // output the information that goes with it: + // + BOOST_ASSERT(*m_position == '$'); + // + // see if this is a trailing '$': + // + if(++m_position == m_end) + { + --m_position; + put(*m_position); + ++m_position; + return; + } + // + // OK find out what kind it is: + // + bool have_brace = false; + ForwardIter save_position = m_position; + switch(*m_position) + { + case '&': + ++m_position; + put(this->m_results[0]); + break; + case '`': + ++m_position; + put(this->m_results.prefix()); + break; + case '\'': + ++m_position; + put(this->m_results.suffix()); + break; + case '$': + put(*m_position++); + break; + case '+': + if((++m_position != m_end) && (*m_position == '{')) + { + ForwardIter base = ++m_position; + while((m_position != m_end) && (*m_position != '}')) ++m_position; + if(m_position != m_end) + { + // Named sub-expression: + put(get_named_sub(base, m_position)); + ++m_position; + break; + } + else + { + m_position = --base; + } + } + put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); + break; + case '{': + have_brace = true; + ++m_position; + // fall through.... + default: + // see if we have a number: + { + std::ptrdiff_t len = ::cutl_details_boost::re_detail::distance(m_position, m_end); + //len = (std::min)(static_cast(2), len); + int v = this->toi(m_position, m_position + len, 10); + if((v < 0) || (have_brace && ((m_position == m_end) || (*m_position != '}')))) + { + // Look for a Perl-5.10 verb: + if(!handle_perl_verb(have_brace)) + { + // leave the $ as is, and carry on: + m_position = --save_position; + put(*m_position); + ++m_position; + } + break; + } + // otherwise output sub v: + put(this->m_results[v]); + if(have_brace) + ++m_position; + } + } +} + +template +bool basic_regex_formatter::handle_perl_verb(bool have_brace) +{ + // + // We may have a capitalised string containing a Perl action: + // + static const char_type MATCH[] = { 'M', 'A', 'T', 'C', 'H' }; + static const char_type PREMATCH[] = { 'P', 'R', 'E', 'M', 'A', 'T', 'C', 'H' }; + static const char_type POSTMATCH[] = { 'P', 'O', 'S', 'T', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_PAREN_MATCH[] = { 'L', 'A', 'S', 'T', '_', 'P', 'A', 'R', 'E', 'N', '_', 'M', 'A', 'T', 'C', 'H' }; + static const char_type LAST_SUBMATCH_RESULT[] = { 'L', 'A', 'S', 'T', '_', 'S', 'U', 'B', 'M', 'A', 'T', 'C', 'H', '_', 'R', 'E', 'S', 'U', 'L', 'T' }; + static const char_type LAST_SUBMATCH_RESULT_ALT[] = { '^', 'N' }; + + if(m_position == m_end) + return false; + if(have_brace && (*m_position == '^')) + ++m_position; + + std::ptrdiff_t max_len = m_end - m_position; + + if((max_len >= 5) && std::equal(m_position, m_position + 5, MATCH)) + { + m_position += 5; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 5; + return false; + } + } + put(this->m_results[0]); + return true; + } + if((max_len >= 8) && std::equal(m_position, m_position + 8, PREMATCH)) + { + m_position += 8; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 8; + return false; + } + } + put(this->m_results.prefix()); + return true; + } + if((max_len >= 9) && std::equal(m_position, m_position + 9, POSTMATCH)) + { + m_position += 9; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 9; + return false; + } + } + put(this->m_results.suffix()); + return true; + } + if((max_len >= 16) && std::equal(m_position, m_position + 16, LAST_PAREN_MATCH)) + { + m_position += 16; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 16; + return false; + } + } + put((this->m_results)[this->m_results.size() > 1 ? static_cast(this->m_results.size() - 1) : 1]); + return true; + } + if((max_len >= 20) && std::equal(m_position, m_position + 20, LAST_SUBMATCH_RESULT)) + { + m_position += 20; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 20; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + if((max_len >= 2) && std::equal(m_position, m_position + 2, LAST_SUBMATCH_RESULT_ALT)) + { + m_position += 2; + if(have_brace) + { + if((m_position != m_end) && (*m_position == '}')) + ++m_position; + else + { + m_position -= 2; + return false; + } + } + put(this->m_results.get_last_closed_paren()); + return true; + } + return false; +} + +template +void basic_regex_formatter::format_escape() +{ + // skip the escape and check for trailing escape: + if(++m_position == m_end) + { + put(static_cast('\\')); + return; + } + // now switch on the escape type: + switch(*m_position) + { + case 'a': + put(static_cast('\a')); + ++m_position; + break; + case 'f': + put(static_cast('\f')); + ++m_position; + break; + case 'n': + put(static_cast('\n')); + ++m_position; + break; + case 'r': + put(static_cast('\r')); + ++m_position; + break; + case 't': + put(static_cast('\t')); + ++m_position; + break; + case 'v': + put(static_cast('\v')); + ++m_position; + break; + case 'x': + if(++m_position == m_end) + { + put(static_cast('x')); + return; + } + // maybe have \x{ddd} + if(*m_position == static_cast('{')) + { + ++m_position; + int val = this->toi(m_position, m_end, 16); + if(val < 0) + { + // invalid value treat everything as literals: + put(static_cast('x')); + put(static_cast('{')); + return; + } + if((m_position == m_end) || (*m_position != static_cast('}'))) + { + --m_position; + while(*m_position != static_cast('\\')) + --m_position; + ++m_position; + put(*m_position++); + return; + } + ++m_position; + put(static_cast(val)); + return; + } + else + { + std::ptrdiff_t len = ::cutl_details_boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast(2), len); + int val = this->toi(m_position, m_position + len, 16); + if(val < 0) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast(val)); + } + break; + case 'c': + if(++m_position == m_end) + { + --m_position; + put(*m_position++); + return; + } + put(static_cast(*m_position++ % 32)); + break; + case 'e': + put(static_cast(27)); + ++m_position; + break; + default: + // see if we have a perl specific escape: + if((m_flags & cutl_details_boost::regex_constants::format_sed) == 0) + { + bool breakout = false; + switch(*m_position) + { + case 'l': + ++m_position; + m_restore_state = m_state; + m_state = output_next_lower; + breakout = true; + break; + case 'L': + ++m_position; + m_state = output_lower; + breakout = true; + break; + case 'u': + ++m_position; + m_restore_state = m_state; + m_state = output_next_upper; + breakout = true; + break; + case 'U': + ++m_position; + m_state = output_upper; + breakout = true; + break; + case 'E': + ++m_position; + m_state = output_copy; + breakout = true; + break; + } + if(breakout) + break; + } + // see if we have a \n sed style backreference: + std::ptrdiff_t len = ::cutl_details_boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast(1), len); + int v = this->toi(m_position, m_position+len, 10); + if((v > 0) || ((v == 0) && (m_flags & ::cutl_details_boost::regex_constants::format_sed))) + { + put(m_results[v]); + break; + } + else if(v == 0) + { + // octal ecape sequence: + --m_position; + len = ::cutl_details_boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast(4), len); + v = this->toi(m_position, m_position + len, 8); + BOOST_ASSERT(v >= 0); + put(static_cast(v)); + break; + } + // Otherwise output the character "as is": + put(*m_position++); + break; + } +} + +template +void basic_regex_formatter::format_conditional() +{ + if(m_position == m_end) + { + // oops trailing '?': + put(static_cast('?')); + return; + } + int v; + if(*m_position == '{') + { + ForwardIter base = m_position; + ++m_position; + v = this->toi(m_position, m_end, 10); + if(v < 0) + { + // Try a named subexpression: + while((m_position != m_end) && (*m_position != '}')) + ++m_position; + v = this->get_named_sub_index(base + 1, m_position); + } + if((v < 0) || (*m_position != '}')) + { + m_position = base; + // oops trailing '?': + put(static_cast('?')); + return; + } + // Skip trailing '}': + ++m_position; + } + else + { + std::ptrdiff_t len = ::cutl_details_boost::re_detail::distance(m_position, m_end); + len = (std::min)(static_cast(2), len); + v = this->toi(m_position, m_position + len, 10); + } + if(v < 0) + { + // oops not a number: + put(static_cast('?')); + return; + } + + // output varies depending upon whether sub-expression v matched or not: + if(m_results[v].matched) + { + m_have_conditional = true; + format_all(); + m_have_conditional = false; + if((m_position != m_end) && (*m_position == static_cast(':'))) + { + // skip the ':': + ++m_position; + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format the rest of this scope: + format_until_scope_end(); + // restore output state: + m_state = saved_state; + } + } + else + { + // save output state, then turn it off: + output_state saved_state = m_state; + m_state = output_none; + // format until ':' or ')': + m_have_conditional = true; + format_all(); + m_have_conditional = false; + // restore state: + m_state = saved_state; + if((m_position != m_end) && (*m_position == static_cast(':'))) + { + // skip the ':': + ++m_position; + // format the rest of this scope: + format_until_scope_end(); + } + } +} + +template +void basic_regex_formatter::format_until_scope_end() +{ + do + { + format_all(); + if((m_position == m_end) || (*m_position == static_cast(')'))) + return; + put(*m_position++); + }while(m_position != m_end); +} + +template +void basic_regex_formatter::put(char_type c) +{ + // write a single character to output + // according to which case translation mode we are in: + switch(this->m_state) + { + case output_none: + return; + case output_next_lower: + c = m_traits.tolower(c); + this->m_state = m_restore_state; + break; + case output_next_upper: + c = m_traits.toupper(c); + this->m_state = m_restore_state; + break; + case output_lower: + c = m_traits.tolower(c); + break; + case output_upper: + c = m_traits.toupper(c); + break; + default: + break; + } + *m_out = c; + ++m_out; +} + +template +void basic_regex_formatter::put(const sub_match_type& sub) +{ + typedef typename sub_match_type::iterator iterator_type; + iterator_type i = sub.first; + while(i != sub.second) + { + put(*i); + ++i; + } +} + +template +class string_out_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator +#endif +{ + S* out; +public: + string_out_iterator(S& s) : out(&s) {} + string_out_iterator& operator++() { return *this; } + string_out_iterator& operator++(int) { return *this; } + string_out_iterator& operator*() { return *this; } + string_out_iterator& operator=(typename S::value_type v) + { + out->append(1, v); + return *this; + } + +#ifdef BOOST_NO_STD_ITERATOR + typedef std::ptrdiff_t difference_type; + typedef typename S::value_type value_type; + typedef value_type* pointer; + typedef value_type& reference; + typedef std::output_iterator_tag iterator_category; +#endif +}; + +template +OutputIterator regex_format_imp(OutputIterator out, + const match_results& m, + ForwardIter p1, ForwardIter p2, + match_flag_type flags, + const traits& t + ) +{ + if(flags & regex_constants::format_literal) + { + return re_detail::copy(p1, p2, out); + } + + re_detail::basic_regex_formatter< + OutputIterator, + match_results, + traits, ForwardIter> f(out, m, t); + return f.format(p1, p2, flags); +} + +#ifndef BOOST_NO_SFINAE + +BOOST_MPL_HAS_XXX_TRAIT_DEF(const_iterator) + +struct any_type { any_type(...); }; +typedef char no_type; +typedef char (&unary_type)[2]; +typedef char (&binary_type)[3]; +typedef char (&ternary_type)[4]; + +no_type check_is_formatter(unary_type, binary_type, ternary_type); +template +unary_type check_is_formatter(T const &, binary_type, ternary_type); +template +binary_type check_is_formatter(unary_type, T const &, ternary_type); +template +binary_type check_is_formatter(T const &, U const &, ternary_type); +template +ternary_type check_is_formatter(unary_type, binary_type, T const &); +template +ternary_type check_is_formatter(T const &, binary_type, U const &); +template +ternary_type check_is_formatter(unary_type, T const &, U const &); +template +ternary_type check_is_formatter(T const &, U const &, V const &); + +struct unary_binary_ternary +{ + typedef unary_type (*unary_fun)(any_type); + typedef binary_type (*binary_fun)(any_type, any_type); + typedef ternary_type (*ternary_fun)(any_type, any_type, any_type); + operator unary_fun(); + operator binary_fun(); + operator ternary_fun(); +}; + +template::value> +struct formatter_wrapper + : Formatter + , unary_binary_ternary +{ + formatter_wrapper(){} +}; + +template +struct formatter_wrapper + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template +struct formatter_wrapper + : unary_binary_ternary +{ + operator Formatter *(); +}; + +template +struct format_traits_imp +{ +private: + // + // F must be a pointer, a function, or a class with a function call operator: + // + BOOST_STATIC_ASSERT((::cutl_details_boost::is_pointer::value || ::cutl_details_boost::is_function::value || ::cutl_details_boost::is_class::value)); + static formatter_wrapper::type> f; + static M m; + static O out; + static cutl_details_boost::regex_constants::match_flag_type flags; +public: + BOOST_STATIC_CONSTANT(int, value = sizeof(check_is_formatter(f(m), f(m, out), f(m, out, flags)))); +}; + +template +struct format_traits +{ +public: + // + // Type is mpl::int_ where N is one of: + // + // 0 : F is a pointer to a presumably null-terminated string. + // 1 : F is a character-container such as a std::string. + // 2 : F is a Unary Functor. + // 3 : F is a Binary Functor. + // 4 : F is a Ternary Functor. + // + typedef typename cutl_details_boost::mpl::if_< + cutl_details_boost::mpl::and_, cutl_details_boost::mpl::not_::type> > >, + cutl_details_boost::mpl::int_<0>, + typename cutl_details_boost::mpl::if_< + has_const_iterator, + cutl_details_boost::mpl::int_<1>, + cutl_details_boost::mpl::int_::value> + >::type + >::type type; + // + // This static assertion will fail if the functor passed does not accept + // the same type of arguments passed. + // + BOOST_STATIC_ASSERT( cutl_details_boost::is_class::value && !has_const_iterator::value ? (type::value > 1) : true); +}; + +#else // BOOST_NO_SFINAE + +template +struct format_traits +{ +public: + // + // Type is mpl::int_ where N is one of: + // + // 0 : F is a pointer to a presumably null-terminated string. + // 1 : F is a character-container such as a std::string. + // + // Other options such as F being a Functor are not supported without + // SFINAE support. + // + typedef typename cutl_details_boost::mpl::if_< + cutl_details_boost::is_pointer, + cutl_details_boost::mpl::int_<0>, + cutl_details_boost::mpl::int_<1> + >::type type; +}; + +#endif // BOOST_NO_SFINAE + +template +struct format_functor3 +{ + format_functor3(Base b) : func(b) {} + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type f) + { + return cutl_details_boost::unwrap_ref(func)(m, i, f); + } + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor3(const format_functor3&); + format_functor3& operator=(const format_functor3&); +}; + +template +struct format_functor2 +{ + format_functor2(Base b) : func(b) {} + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type /*f*/) + { + return cutl_details_boost::unwrap_ref(func)(m, i); + } + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor2(const format_functor2&); + format_functor2& operator=(const format_functor2&); +}; + +template +struct format_functor1 +{ + format_functor1(Base b) : func(b) {} + + template + OutputIter do_format_string(const S& s, OutputIter i) + { + return re_detail::copy(s.begin(), s.end(), i); + } + template + inline OutputIter do_format_string(const S* s, OutputIter i) + { + while(s && *s) + { + *i = *s; + ++i; + ++s; + } + return i; + } + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type /*f*/) + { + return do_format_string(cutl_details_boost::unwrap_ref(func)(m), i); + } + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type f, const Traits&) + { + return (*this)(m, i, f); + } +private: + Base func; + format_functor1(const format_functor1&); + format_functor1& operator=(const format_functor1&); +}; + +template +struct format_functor_c_string +{ + format_functor_c_string(const charT* ps) : func(ps) {} + + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + typedef typename Match::char_type char_type; + const charT* end = func; + while(*end) ++end; + return regex_format_imp(i, m, func, end, f, t); + } +private: + const charT* func; + format_functor_c_string(const format_functor_c_string&); + format_functor_c_string& operator=(const format_functor_c_string&); +}; + +template +struct format_functor_container +{ + format_functor_container(const Container& c) : func(c) {} + + template + OutputIter operator()(const Match& m, OutputIter i, cutl_details_boost::regex_constants::match_flag_type f, const Traits& t = Traits()) + { + typedef typename Match::char_type char_type; + return re_detail::regex_format_imp(i, m, func.begin(), func.end(), f, t); + } +private: + const Container& func; + format_functor_container(const format_functor_container&); + format_functor_container& operator=(const format_functor_container&); +}; + +template > +struct compute_functor_type +{ + typedef typename format_traits::type tag; + typedef typename cutl_details_boost::remove_cv< typename cutl_details_boost::remove_pointer::type>::type maybe_char_type; + + typedef typename mpl::if_< + ::cutl_details_boost::is_same >, format_functor_c_string, + typename mpl::if_< + ::cutl_details_boost::is_same >, format_functor_container, + typename mpl::if_< + ::cutl_details_boost::is_same >, format_functor1, + typename mpl::if_< + ::cutl_details_boost::is_same >, format_functor2, + format_functor3 + >::type + >::type + >::type + >::type type; +}; + +} // namespace re_detail + +template +inline OutputIterator regex_format(OutputIterator out, + const match_results& m, + Functor fmt, + match_flag_type flags = format_all + ) +{ + return m.format(out, fmt, flags); +} + +template +inline std::basic_string::char_type> regex_format(const match_results& m, + Functor fmt, + match_flag_type flags = format_all) +{ + return m.format(fmt, flags); +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_FORMAT_HPP + + + + + + diff --git a/cutl/details/boost/regex/v4/regex_fwd.hpp b/cutl/details/boost/regex/v4/regex_fwd.hpp new file mode 100644 index 0000000..da2d66b --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_fwd.hpp @@ -0,0 +1,73 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_fwd.cpp + * VERSION see + * DESCRIPTION: Forward declares cutl_details_boost::basic_regex<> and + * associated typedefs. + */ + +#ifndef BOOST_REGEX_FWD_HPP_INCLUDED +#define BOOST_REGEX_FWD_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +// +// define BOOST_REGEX_NO_FWD if this +// header doesn't work! +// +#ifdef BOOST_REGEX_NO_FWD +# ifndef BOOST_RE_REGEX_HPP +# include +# endif +#else + +namespace cutl_details_boost{ + +template +class cpp_regex_traits; +template +struct c_regex_traits; +template +class w32_regex_traits; + +#ifdef BOOST_REGEX_USE_WIN32_LOCALE +template > +struct regex_traits; +#elif defined(BOOST_REGEX_USE_CPP_LOCALE) +template > +struct regex_traits; +#else +template > +struct regex_traits; +#endif + +template > +class basic_regex; + +typedef basic_regex > regex; +#ifndef BOOST_NO_WREGEX +typedef basic_regex > wregex; +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_NO_FWD + +#endif + + + + diff --git a/cutl/details/boost/regex/v4/regex_grep.hpp b/cutl/details/boost/regex/v4/regex_grep.hpp new file mode 100644 index 0000000..f3f3eec --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_grep.hpp @@ -0,0 +1,155 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_grep.hpp + * VERSION see + * DESCRIPTION: Provides regex_grep implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_GREP_HPP +#define BOOST_REGEX_V4_REGEX_GREP_HPP + + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +// +// regex_grep: +// find all non-overlapping matches within the sequence first last: +// +template +inline unsigned int regex_grep(Predicate foo, + BidiIterator first, + BidiIterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + typedef typename match_results::allocator_type match_allocator_type; + + match_results m; + re_detail::perl_matcher matcher(first, last, m, e, flags, first); + unsigned int count = 0; + while(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; // caller doesn't want to go on + if(m[0].second == last) + return count; // we've reached the end, don't try and find an extra null match. + if(m.length() == 0) + { + if(m[0].second == last) + return count; + // we found a NULL-match, now try to find + // a non-NULL one at the same position: + match_results m2(m); + matcher.setf(match_not_null | match_continuous); + if(matcher.find()) + { + ++count; + if(0 == foo(m)) + return count; + } + else + { + // reset match back to where it was: + m = m2; + } + matcher.unsetf((match_not_null | match_continuous) & ~flags); + } + } + return count; +} + +// +// regex_grep convenience interfaces: +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template +inline unsigned int regex_grep(Predicate foo, const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + traits::length(str), e, flags); +} + +template +inline unsigned int regex_grep(Predicate foo, const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} +#else // partial specialisation +inline unsigned int regex_grep(bool (*foo)(const cmatch&), const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + regex::traits_type::length(str), e, flags); +} +#ifndef BOOST_NO_WREGEX +inline unsigned int regex_grep(bool (*foo)(const wcmatch&), const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, str, str + wregex::traits_type::length(str), e, flags); +} +#endif +inline unsigned int regex_grep(bool (*foo)(const match_results&), const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} +#if !defined(BOOST_NO_WREGEX) +inline unsigned int regex_grep(bool (*foo)(const match_results::const_iterator>&), + const std::basic_string& s, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_grep(foo, s.begin(), s.end(), e, flags); +} +#endif +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_GREP_HPP + diff --git a/cutl/details/boost/regex/v4/regex_iterator.hpp b/cutl/details/boost/regex/v4/regex_iterator.hpp new file mode 100644 index 0000000..daf8d22 --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_iterator.hpp @@ -0,0 +1,201 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_iterator.hpp + * VERSION see + * DESCRIPTION: Provides regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_ITERATOR_HPP +#define BOOST_REGEX_V4_REGEX_ITERATOR_HPP + +#include + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template +class regex_iterator_implementation +{ + typedef basic_regex regex_type; + + match_results what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + bool init(BidirectionalIterator first) + { + base = first; + return regex_search(first, end, what, re, flags); + } + bool compare(const regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length()) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + regex_iterator_implementation& operator=(const regex_iterator_implementation&); +}; + +template ::value_type, + class traits = regex_traits > +class regex_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + match_results, + typename re_detail::regex_iterator_traits::difference_type, + const match_results*, + const match_results& > +#endif +{ +private: + typedef regex_iterator_implementation impl; + typedef shared_ptr pimpl; +public: + typedef basic_regex regex_type; + typedef match_results value_type; + typedef typename re_detail::regex_iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_iterator(){} + regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + regex_iterator(const regex_iterator& that) + : pdata(that.pdata) {} + regex_iterator& operator=(const regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_iterator operator++(int) + { + regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_iterator cregex_iterator; +typedef regex_iterator sregex_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_iterator wcregex_iterator; +typedef regex_iterator wsregex_iterator; +#endif + +// make_regex_iterator: +template +inline regex_iterator make_regex_iterator(const charT* p, const basic_regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator(p, p+traits::length(p), e, m); +} +template +inline regex_iterator::const_iterator, charT, traits> make_regex_iterator(const std::basic_string& p, const basic_regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, m); +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_ITERATOR_HPP + diff --git a/cutl/details/boost/regex/v4/regex_match.hpp b/cutl/details/boost/regex/v4/regex_match.hpp new file mode 100644 index 0000000..045c4bc --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_match.hpp @@ -0,0 +1,382 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_match.hpp + * VERSION see + * DESCRIPTION: Regular expression matching algorithms. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + + +#ifndef BOOST_REGEX_MATCH_HPP +#define BOOST_REGEX_MATCH_HPP + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +// +// proc regex_match +// returns true if the specified regular expression matches +// the whole of the input. Fills in what matched in m. +// +template +bool regex_match(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + re_detail::perl_matcher matcher(first, last, m, e, flags, first); + return matcher.match(); +} +template +bool regex_match(iterator first, iterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(first, last, m, e, flags | regex_constants::match_any); +} +// +// query_match convenience interfaces: +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template +inline bool regex_match(const charT* str, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + traits::length(str), m, e, flags); +} + +template +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator, Allocator>& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +template +inline bool regex_match(const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + traits::length(str), m, e, flags | regex_constants::match_any); +} + +template +inline bool regex_match(const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + typedef typename std::basic_string::const_iterator iterator; + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#else // partial ordering +inline bool regex_match(const char* str, + cmatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const char* str, + cmatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const char* str, + cmatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const char* str, + cmatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const char* str, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +#ifndef BOOST_NO_WREGEX +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const wchar_t* str, + wcmatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_match(const wchar_t* str, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +#endif +inline bool regex_match(const std::string& s, + smatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const std::string& s, + smatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const std::string& s, + smatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const std::string& s, + smatch& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::string& s, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +#if !defined(BOOST_NO_WREGEX) +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator>& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string& s, + const wregex& e, + match_flag_type flags = match_default) +{ + match_results::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_STD_LOCALE +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator>& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string& s, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator>& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string& s, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +inline bool regex_match(const std::basic_string& s, + match_results::const_iterator>& m, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + return regex_match(s.begin(), s.end(), m, e, flags); +} +inline bool regex_match(const std::basic_string& s, + const basic_regex >& e, + match_flag_type flags = match_default) +{ + match_results::const_iterator> m; + return regex_match(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#endif +#endif + +#endif + + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_MATCH_HPP + + + + + + + + + + + + + + + + + + diff --git a/cutl/details/boost/regex/v4/regex_merge.hpp b/cutl/details/boost/regex/v4/regex_merge.hpp new file mode 100644 index 0000000..80f00ac --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_merge.hpp @@ -0,0 +1,93 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V4_REGEX_MERGE_HPP +#define BOOST_REGEX_V4_REGEX_MERGE_HPP + + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(out, first, last, e, fmt, flags); +} + +template +inline OutputIterator regex_merge(OutputIterator out, + Iterator first, + Iterator last, + const basic_regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_merge(out, first, last, e, fmt.c_str(), flags); +} + +template +inline std::basic_string regex_merge(const std::basic_string& s, + const basic_regex& e, + const charT* fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +template +inline std::basic_string regex_merge(const std::basic_string& s, + const basic_regex& e, + const std::basic_string& fmt, + match_flag_type flags = match_default) +{ + return regex_replace(s, e, fmt, flags); +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_MERGE_HPP + + diff --git a/cutl/details/boost/regex/v4/regex_raw_buffer.hpp b/cutl/details/boost/regex/v4/regex_raw_buffer.hpp new file mode 100644 index 0000000..5aba83d --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_raw_buffer.hpp @@ -0,0 +1,210 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_raw_buffer.hpp + * VERSION see + * DESCRIPTION: Raw character buffer for regex code. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_RAW_BUFFER_HPP +#define BOOST_REGEX_RAW_BUFFER_HPP + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif + +#include +#include + +namespace cutl_details_boost{ + namespace re_detail{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +struct empty_padding{}; + +union padding +{ + void* p; + unsigned int i; +}; + +template +struct padding3 +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<2> +{ + enum{ + padding_size = 2, + padding_mask = 1 + }; +}; + +template<> +struct padding3<4> +{ + enum{ + padding_size = 4, + padding_mask = 3 + }; +}; + +template<> +struct padding3<8> +{ + enum{ + padding_size = 8, + padding_mask = 7 + }; +}; + +template<> +struct padding3<16> +{ + enum{ + padding_size = 16, + padding_mask = 15 + }; +}; + +enum{ + padding_size = padding3::padding_size, + padding_mask = padding3::padding_mask +}; + +// +// class raw_storage +// basically this is a simplified vector +// this is used by basic_regex for expression storage +// + +class BOOST_REGEX_DECL raw_storage +{ +public: + typedef std::size_t size_type; + typedef unsigned char* pointer; +private: + pointer last, start, end; +public: + + raw_storage(); + raw_storage(size_type n); + + ~raw_storage() + { + ::operator delete(start); + } + + void BOOST_REGEX_CALL resize(size_type n); + + void* BOOST_REGEX_CALL extend(size_type n) + { + if(size_type(last - end) < n) + resize(n + (end - start)); + register pointer result = end; + end += n; + return result; + } + + void* BOOST_REGEX_CALL insert(size_type pos, size_type n); + + size_type BOOST_REGEX_CALL size() + { + return end - start; + } + + size_type BOOST_REGEX_CALL capacity() + { + return last - start; + } + + void* BOOST_REGEX_CALL data()const + { + return start; + } + + size_type BOOST_REGEX_CALL index(void* ptr) + { + return static_cast(ptr) - static_cast(data()); + } + + void BOOST_REGEX_CALL clear() + { + end = start; + } + + void BOOST_REGEX_CALL align() + { + // move end up to a boundary: + end = start + (((end - start) + padding_mask) & ~padding_mask); + } + void swap(raw_storage& that) + { + std::swap(start, that.start); + std::swap(end, that.end); + std::swap(last, that.last); + } +}; + +inline raw_storage::raw_storage() +{ + last = start = end = 0; +} + +inline raw_storage::raw_storage(size_type n) +{ + start = end = static_cast(::operator new(n)); + BOOST_REGEX_NOEH_ASSERT(start) + last = start + n; +} + + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail +} // namespace cutl_details_boost + +#endif + + + + + + diff --git a/cutl/details/boost/regex/v4/regex_replace.hpp b/cutl/details/boost/regex/v4/regex_replace.hpp new file mode 100644 index 0000000..24867da --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_replace.hpp @@ -0,0 +1,99 @@ +/* + * + * Copyright (c) 1998-2009 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_format.hpp + * VERSION see + * DESCRIPTION: Provides formatting output routines for search and replace + * operations. Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_V4_REGEX_REPLACE_HPP +#define BOOST_REGEX_V4_REGEX_REPLACE_HPP + + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template +OutputIterator regex_replace(OutputIterator out, + BidirectionalIterator first, + BidirectionalIterator last, + const basic_regex& e, + Formatter fmt, + match_flag_type flags = match_default) +{ + regex_iterator i(first, last, e, flags); + regex_iterator j; + if(i == j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(first, last, out); + } + else + { + BidirectionalIterator last_m(first); + while(i != j) + { + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(i->prefix().first, i->prefix().second, out); + out = i->format(out, fmt, flags, e); + last_m = (*i)[0].second; + if(flags & regex_constants::format_first_only) + break; + ++i; + } + if(!(flags & regex_constants::format_no_copy)) + out = re_detail::copy(last_m, last, out); + } + return out; +} + +template +std::basic_string regex_replace(const std::basic_string& s, + const basic_regex& e, + Formatter fmt, + match_flag_type flags = match_default) +{ + std::basic_string result; + re_detail::string_out_iterator > i(result); + regex_replace(i, s.begin(), s.end(), e, fmt, flags); + return result; +} + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_REPLACE_HPP + + diff --git a/cutl/details/boost/regex/v4/regex_search.hpp b/cutl/details/boost/regex/v4/regex_search.hpp new file mode 100644 index 0000000..e6de6b6 --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_search.hpp @@ -0,0 +1,217 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_search.hpp + * VERSION see + * DESCRIPTION: Provides regex_search implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_SEARCH_HPP +#define BOOST_REGEX_V4_REGEX_SEARCH_HPP + + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(first, last, m, e, flags, first); +} + +template +bool regex_search(BidiIterator first, BidiIterator last, + match_results& m, + const basic_regex& e, + match_flag_type flags, + BidiIterator base) +{ + if(e.flags() & regex_constants::failbit) + return false; + + re_detail::perl_matcher matcher(first, last, m, e, flags, base); + return matcher.find(); +} + +// +// regex_search convenience interfaces: +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING +// +// this isn't really a partial specialisation, but template function +// overloading - if the compiler doesn't support partial specialisation +// then it really won't support this either: +template +inline bool regex_search(const charT* str, + match_results& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), m, e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + match_results::const_iterator, Allocator>& m, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} +#else // partial overloads: +inline bool regex_search(const char* str, + cmatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + regex::traits_type::length(str), m, e, flags); +} +inline bool regex_search(const char* first, const char* last, + const regex& e, + match_flag_type flags = match_default) +{ + cmatch m; + return regex_search(first, last, m, e, flags | regex_constants::match_any); +} + +#ifndef BOOST_NO_WREGEX +inline bool regex_search(const wchar_t* str, + wcmatch& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + wregex::traits_type::length(str), m, e, flags); +} +inline bool regex_search(const wchar_t* first, const wchar_t* last, + const wregex& e, + match_flag_type flags = match_default) +{ + wcmatch m; + return regex_search(first, last, m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_search(const std::string& s, + smatch& m, + const regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} +#if !defined(BOOST_NO_WREGEX) +inline bool regex_search(const std::basic_string& s, + wsmatch& m, + const wregex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), m, e, flags); +} +#endif + +#endif + +template +bool regex_search(BidiIterator first, BidiIterator last, + const basic_regex& e, + match_flag_type flags = match_default) +{ + if(e.flags() & regex_constants::failbit) + return false; + + match_results m; + typedef typename match_results::allocator_type match_alloc_type; + re_detail::perl_matcher matcher(first, last, m, e, flags | regex_constants::match_any, first); + return matcher.find(); +} + +#ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING + +template +inline bool regex_search(const charT* str, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(str, str + traits::length(str), e, flags); +} + +template +inline bool regex_search(const std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_search(s.begin(), s.end(), e, flags); +} +#else // non-template function overloads +inline bool regex_search(const char* str, + const regex& e, + match_flag_type flags = match_default) +{ + cmatch m; + return regex_search(str, str + regex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#ifndef BOOST_NO_WREGEX +inline bool regex_search(const wchar_t* str, + const wregex& e, + match_flag_type flags = match_default) +{ + wcmatch m; + return regex_search(str, str + wregex::traits_type::length(str), m, e, flags | regex_constants::match_any); +} +#endif +inline bool regex_search(const std::string& s, + const regex& e, + match_flag_type flags = match_default) +{ + smatch m; + return regex_search(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} +#if !defined(BOOST_NO_WREGEX) +inline bool regex_search(const std::basic_string& s, + const wregex& e, + match_flag_type flags = match_default) +{ + wsmatch m; + return regex_search(s.begin(), s.end(), m, e, flags | regex_constants::match_any); +} + +#endif // BOOST_NO_WREGEX + +#endif // partial overload + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_SEARCH_HPP + + diff --git a/cutl/details/boost/regex/v4/regex_split.hpp b/cutl/details/boost/regex/v4/regex_split.hpp new file mode 100644 index 0000000..c90e0d4 --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_split.hpp @@ -0,0 +1,172 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_split.hpp + * VERSION see + * DESCRIPTION: Implements regex_split and associated functions. + * Note this is an internal header file included + * by regex.hpp, do not include on its own. + */ + +#ifndef BOOST_REGEX_SPLIT_HPP +#define BOOST_REGEX_SPLIT_HPP + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace re_detail{ + +template +const basic_regex& get_default_expression(charT) +{ + static const charT expression_text[4] = { '\\', 's', '+', '\00', }; + static const basic_regex e(expression_text); + return e; +} + +template +class split_pred +{ + typedef std::basic_string string_type; + typedef typename string_type::const_iterator iterator_type; + iterator_type* p_last; + OutputIterator* p_out; + std::size_t* p_max; + std::size_t initial_max; +public: + split_pred(iterator_type* a, OutputIterator* b, std::size_t* c) + : p_last(a), p_out(b), p_max(c), initial_max(*c) {} + + bool operator()(const match_results& what); +}; + +template +bool split_pred::operator() + (const match_results& what) +{ + *p_last = what[0].second; + if(what.size() > 1) + { + // output sub-expressions only: + for(unsigned i = 1; i < what.size(); ++i) + { + *(*p_out) = what.str(i); + ++(*p_out); + if(0 == --*p_max) return false; + } + return *p_max != 0; + } + else + { + // output $` only if it's not-null or not at the start of the input: + const sub_match& sub = what[-1]; + if((sub.first != sub.second) || (*p_max != initial_max)) + { + *(*p_out) = sub.str(); + ++(*p_out); + return --*p_max; + } + } + // + // initial null, do nothing: + return true; +} + +} // namespace re_detail + +template +std::size_t regex_split(OutputIterator out, + std::basic_string& s, + const basic_regex& e, + match_flag_type flags, + std::size_t max_split) +{ + typedef typename std::basic_string::const_iterator ci_t; + typedef typename match_results::allocator_type match_allocator; + ci_t last = s.begin(); + std::size_t init_size = max_split; + re_detail::split_pred pred(&last, &out, &max_split); + ci_t i, j; + i = s.begin(); + j = s.end(); + regex_grep(pred, i, j, e, flags); + // + // if there is still input left, do a final push as long as max_split + // is not exhausted, and we're not splitting sub-expressions rather + // than whitespace: + if(max_split && (last != s.end()) && (e.mark_count() == 1)) + { + *out = std::basic_string((ci_t)last, (ci_t)s.end()); + ++out; + last = s.end(); + --max_split; + } + // + // delete from the string everything that has been processed so far: + s.erase(0, last - s.begin()); + // + // return the number of new records pushed: + return init_size - max_split; +} + +template +inline std::size_t regex_split(OutputIterator out, + std::basic_string& s, + const basic_regex& e, + match_flag_type flags = match_default) +{ + return regex_split(out, s, e, flags, UINT_MAX); +} + +template +inline std::size_t regex_split(OutputIterator out, + std::basic_string& s) +{ + return regex_split(out, s, re_detail::get_default_expression(charT(0)), match_default, UINT_MAX); +} + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif + + diff --git a/cutl/details/boost/regex/v4/regex_token_iterator.hpp b/cutl/details/boost/regex/v4/regex_token_iterator.hpp new file mode 100644 index 0000000..608862e --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_token_iterator.hpp @@ -0,0 +1,342 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_token_iterator.hpp + * VERSION see + * DESCRIPTION: Provides regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP + +#include +#include +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) +// +// Borland C++ Builder 6, and Visual C++ 6, +// can't cope with the array template constructor +// so we have a template member that will accept any type as +// argument, and then assert that is really is an array: +// +#include +#include +#endif + +namespace cutl_details_boost{ + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(push) +# pragma warning(disable:4700) +#endif + +template +class regex_token_iterator_implementation +{ + typedef basic_regex regex_type; + typedef sub_match value_type; + + match_results what; // current match + BidirectionalIterator base; // start of search area + BidirectionalIterator end; // end of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector subs; // the sub-expressions to enumerate + +public: + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f){ subs.push_back(sub); } + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) + : end(last), re(*p), flags(f), subs(v){} +#if !BOOST_WORKAROUND(__HP_aCC, < 60700) +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const T& submatches, match_flag_type f) + : end(last), re(*p), flags(f) + { + // assert that T really is an array: + BOOST_STATIC_ASSERT(::cutl_details_boost::is_array::value); + const std::size_t array_size = sizeof(T) / sizeof(submatches[0]); + for(std::size_t i = 0; i < array_size; ++i) + { + subs.push_back(submatches[i]); + } + } +#else + template + regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } +#endif +#endif + bool init(BidirectionalIterator first) + { + N = 0; + base = first; + if(regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= /*match_prev_avail |*/ regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + regex_token_iterator_implementation& operator=(const regex_token_iterator_implementation&); +}; + +template ::value_type, + class traits = regex_traits > +class regex_token_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + sub_match, + typename re_detail::regex_iterator_traits::difference_type, + const sub_match*, + const sub_match& > +#endif +{ +private: + typedef regex_token_iterator_implementation impl; + typedef shared_ptr pimpl; +public: + typedef basic_regex regex_type; + typedef sub_match value_type; + typedef typename re_detail::regex_iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + regex_token_iterator(){} + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#if !BOOST_WORKAROUND(__HP_aCC, < 60700) +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const T& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#else + template + regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#endif +#endif + regex_token_iterator(const regex_token_iterator& that) + : pdata(that.pdata) {} + regex_token_iterator& operator=(const regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + regex_token_iterator operator++(int) + { + regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef regex_token_iterator cregex_token_iterator; +typedef regex_token_iterator sregex_token_iterator; +#ifndef BOOST_NO_WREGEX +typedef regex_token_iterator wcregex_token_iterator; +typedef regex_token_iterator wsregex_token_iterator; +#endif + +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} +#endif +template +inline regex_token_iterator make_regex_token_iterator(const charT* p, const basic_regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator(p, p+traits::length(p), e, submatch, m); +} +template +inline regex_token_iterator::const_iterator, charT, traits> make_regex_token_iterator(const std::basic_string& p, const basic_regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return regex_token_iterator::const_iterator, charT, traits>(p.begin(), p.end(), e, submatch, m); +} + +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(pop) +#endif +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/cutl/details/boost/regex/v4/regex_traits.hpp b/cutl/details/boost/regex/v4/regex_traits.hpp new file mode 100644 index 0000000..b632864 --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_traits.hpp @@ -0,0 +1,189 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits classes. + */ + +#ifndef BOOST_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_REGEX_CONFIG_HPP +#include +#endif +#ifndef BOOST_REGEX_WORKAROUND_HPP +#include +#endif +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#include +#endif +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#include +#endif +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include +#endif +#ifndef BOOST_NO_STD_LOCALE +# ifndef BOOST_CPP_REGEX_TRAITS_HPP_INCLUDED +# include +# endif +#endif +#if !BOOST_WORKAROUND(__BORLANDC__, < 0x560) +# ifndef BOOST_C_REGEX_TRAITS_HPP_INCLUDED +# include +# endif +#endif +#if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) +# ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED +# include +# endif +#endif +#ifndef BOOST_REGEX_FWD_HPP_INCLUDED +#include +#endif + +#include "cutl/details/boost/mpl/has_xxx.hpp" +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ + +template +struct regex_traits : public implementationT +{ + regex_traits() : implementationT() {} +}; + +// +// class regex_traits_wrapper. +// this is what our implementation will actually store; +// it provides default implementations of the "optional" +// interfaces that we support, in addition to the +// required "standard" ones: +// +namespace re_detail{ +#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !BOOST_WORKAROUND(__HP_aCC, < 60000) +BOOST_MPL_HAS_XXX_TRAIT_DEF(boost_extensions_tag) +#else +template +struct has_boost_extensions_tag +{ + BOOST_STATIC_CONSTANT(bool, value = false); +}; +#endif + +template +struct default_wrapper : public BaseT +{ + typedef typename BaseT::char_type char_type; + std::string error_string(::cutl_details_boost::regex_constants::error_type e)const + { + return ::cutl_details_boost::re_detail::get_default_error_string(e); + } + ::cutl_details_boost::regex_constants::syntax_type syntax_type(char_type c)const + { + return ((c & 0x7f) == c) ? get_default_syntax_type(static_cast(c)) : ::cutl_details_boost::regex_constants::syntax_char; + } + ::cutl_details_boost::regex_constants::escape_syntax_type escape_syntax_type(char_type c)const + { + return ((c & 0x7f) == c) ? get_default_escape_syntax_type(static_cast(c)) : ::cutl_details_boost::regex_constants::escape_type_identity; + } + int toi(const char_type*& p1, const char_type* p2, int radix)const + { + return ::cutl_details_boost::re_detail::global_toi(p1, p2, radix, *this); + } + char_type translate(char_type c, bool icase)const + { + return (icase ? this->translate_nocase(c) : this->translate(c)); + } + char_type translate(char_type c)const + { + return BaseT::translate(c); + } + char_type tolower(char_type c)const + { + return ::cutl_details_boost::re_detail::global_lower(c); + } + char_type toupper(char_type c)const + { + return ::cutl_details_boost::re_detail::global_upper(c); + } +}; + +template +struct compute_wrapper_base +{ + typedef BaseT type; +}; +#if !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) && !BOOST_WORKAROUND(__HP_aCC, < 60000) +template +struct compute_wrapper_base +{ + typedef default_wrapper type; +}; +#else +template <> +struct compute_wrapper_base, false> +{ + typedef default_wrapper > type; +}; +#ifndef BOOST_NO_WREGEX +template <> +struct compute_wrapper_base, false> +{ + typedef default_wrapper > type; +}; +#endif +#endif + +} // namespace re_detail + +template +struct regex_traits_wrapper + : public ::cutl_details_boost::re_detail::compute_wrapper_base< + BaseT, + ::cutl_details_boost::re_detail::has_boost_extensions_tag::value + >::type +{ + regex_traits_wrapper(){} +private: + regex_traits_wrapper(const regex_traits_wrapper&); + regex_traits_wrapper& operator=(const regex_traits_wrapper&); +}; + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif // include + diff --git a/cutl/details/boost/regex/v4/regex_traits_defaults.hpp b/cutl/details/boost/regex/v4/regex_traits_defaults.hpp new file mode 100644 index 0000000..457d0ff --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_traits_defaults.hpp @@ -0,0 +1,371 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_traits_defaults.hpp + * VERSION see + * DESCRIPTION: Declares API's for access to regex_traits default properties. + */ + +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#include +#endif +#ifndef BOOST_REGEX_ERROR_TYPE_HPP +#include +#endif + +#ifdef BOOST_NO_STDC_NAMESPACE +namespace std{ + using ::strlen; +} +#endif + +namespace cutl_details_boost{ namespace re_detail{ + + +// +// helpers to suppress warnings: +// +template +inline bool is_extended(charT c) +{ return c > 256; } +inline bool is_extended(char) +{ return false; } + + +BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n); +BOOST_REGEX_DECL const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n); +BOOST_REGEX_DECL regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c); +BOOST_REGEX_DECL regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c); + +// is charT c a combining character? +BOOST_REGEX_DECL bool BOOST_REGEX_CALL is_combining_implementation(uint_least16_t s); + +template +inline bool is_combining(charT c) +{ + return (c <= static_cast(0)) ? false : ((c >= static_cast((std::numeric_limits::max)())) ? false : is_combining_implementation(static_cast(c))); +} +template <> +inline bool is_combining(char) +{ + return false; +} +template <> +inline bool is_combining(signed char) +{ + return false; +} +template <> +inline bool is_combining(unsigned char) +{ + return false; +} +#if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives +#ifdef _MSC_VER +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +#if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX) +template<> +inline bool is_combining(wchar_t c) +{ + return is_combining_implementation(static_cast(c)); +} +#else +template<> +inline bool is_combining(wchar_t c) +{ + return (c >= (std::numeric_limits::max)()) ? false : is_combining_implementation(static_cast(c)); +} +#endif +#endif +#endif + +// +// is a charT c a line separator? +// +template +inline bool is_separator(charT c) +{ + return BOOST_REGEX_MAKE_BOOL( + (c == static_cast('\n')) + || (c == static_cast('\r')) + || (c == static_cast('\f')) + || (static_cast(c) == 0x2028u) + || (static_cast(c) == 0x2029u) + || (static_cast(c) == 0x85u)); +} +template <> +inline bool is_separator(char c) +{ + return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f')); +} + +// +// get a default collating element: +// +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name); + +// +// get the state_id of a character clasification, the individual +// traits classes then transform that state_id into a bitmask: +// +template +struct character_pointer_range +{ + const charT* p1; + const charT* p2; + + bool operator < (const character_pointer_range& r)const + { + return std::lexicographical_compare(p1, p2, r.p1, r.p2); + } + bool operator == (const character_pointer_range& r)const + { + // Not only do we check that the ranges are of equal size before + // calling std::equal, but there is no other algorithm available: + // not even a non-standard MS one. So forward to unchecked_equal + // in the MS case. + return ((p2 - p1) == (r.p2 - r.p1)) && re_detail::equal(p1, p2, r.p1); + } +}; +template +int get_default_class_id(const charT* p1, const charT* p2) +{ + static const charT data[73] = { + 'a', 'l', 'n', 'u', 'm', + 'a', 'l', 'p', 'h', 'a', + 'b', 'l', 'a', 'n', 'k', + 'c', 'n', 't', 'r', 'l', + 'd', 'i', 'g', 'i', 't', + 'g', 'r', 'a', 'p', 'h', + 'l', 'o', 'w', 'e', 'r', + 'p', 'r', 'i', 'n', 't', + 'p', 'u', 'n', 'c', 't', + 's', 'p', 'a', 'c', 'e', + 'u', 'n', 'i', 'c', 'o', 'd', 'e', + 'u', 'p', 'p', 'e', 'r', + 'v', + 'w', 'o', 'r', 'd', + 'x', 'd', 'i', 'g', 'i', 't', + }; + + static const character_pointer_range ranges[21] = + { + {data+0, data+5,}, // alnum + {data+5, data+10,}, // alpha + {data+10, data+15,}, // blank + {data+15, data+20,}, // cntrl + {data+20, data+21,}, // d + {data+20, data+25,}, // digit + {data+25, data+30,}, // graph + {data+29, data+30,}, // h + {data+30, data+31,}, // l + {data+30, data+35,}, // lower + {data+35, data+40,}, // print + {data+40, data+45,}, // punct + {data+45, data+46,}, // s + {data+45, data+50,}, // space + {data+57, data+58,}, // u + {data+50, data+57,}, // unicode + {data+57, data+62,}, // upper + {data+62, data+63,}, // v + {data+63, data+64,}, // w + {data+63, data+67,}, // word + {data+67, data+73,}, // xdigit + }; + static const character_pointer_range* ranges_begin = ranges; + static const character_pointer_range* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0])); + + character_pointer_range t = { p1, p2, }; + const character_pointer_range* p = std::lower_bound(ranges_begin, ranges_end, t); + if((p != ranges_end) && (t == *p)) + return static_cast(p - ranges); + return -1; +} + +// +// helper functions: +// +template +std::ptrdiff_t global_length(const charT* p) +{ + std::ptrdiff_t n = 0; + while(*p) + { + ++p; + ++n; + } + return n; +} +template<> +inline std::ptrdiff_t global_length(const char* p) +{ + return (std::strlen)(p); +} +#ifndef BOOST_NO_WREGEX +template<> +inline std::ptrdiff_t global_length(const wchar_t* p) +{ + return (std::wcslen)(p); +} +#endif +template +inline charT BOOST_REGEX_CALL global_lower(charT c) +{ + return c; +} +template +inline charT BOOST_REGEX_CALL global_upper(charT c) +{ + return c; +} + +BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_lower(char c); +BOOST_REGEX_DECL char BOOST_REGEX_CALL do_global_upper(char c); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c); +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c); +#endif +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_lower(unsigned short c); +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL do_global_upper(unsigned short c); +#endif +// +// This sucks: declare template specialisations of global_lower/global_upper +// that just forward to the non-template implementation functions. We do +// this because there is one compiler (Compaq Tru64 C++) that doesn't seem +// to differentiate between templates and non-template overloads.... +// what's more, the primary template, plus all overloads have to be +// defined in the same translation unit (if one is inline they all must be) +// otherwise the "local template instantiation" compiler option can pick +// the wrong instantiation when linking: +// +template<> inline char BOOST_REGEX_CALL global_lower(char c){ return do_global_lower(c); } +template<> inline char BOOST_REGEX_CALL global_upper(char c){ return do_global_upper(c); } +#ifndef BOOST_NO_WREGEX +template<> inline wchar_t BOOST_REGEX_CALL global_lower(wchar_t c){ return do_global_lower(c); } +template<> inline wchar_t BOOST_REGEX_CALL global_upper(wchar_t c){ return do_global_upper(c); } +#endif +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +template<> inline unsigned short BOOST_REGEX_CALL global_lower(unsigned short c){ return do_global_lower(c); } +template<> inline unsigned short BOOST_REGEX_CALL global_upper(unsigned short c){ return do_global_upper(c); } +#endif + +template +int global_value(charT c) +{ + static const charT zero = '0'; + static const charT nine = '9'; + static const charT a = 'a'; + static const charT f = 'f'; + static const charT A = 'A'; + static const charT F = 'F'; + + if(c > f) return -1; + if(c >= a) return 10 + (c - a); + if(c > F) return -1; + if(c >= A) return 10 + (c - A); + if(c > nine) return -1; + if(c >= zero) return c - zero; + return -1; +} +template +int global_toi(const charT*& p1, const charT* p2, int radix, const traits& t) +{ + (void)t; // warning suppression + int next_value = t.value(*p1, radix); + if((p1 == p2) || (next_value < 0) || (next_value >= radix)) + return -1; + int result = 0; + while(p1 != p2) + { + next_value = t.value(*p1, radix); + if((next_value < 0) || (next_value >= radix)) + break; + result *= radix; + result += next_value; + ++p1; + } + return result; +} + +template +inline const charT* get_escape_R_string() +{ +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable:4309 4245) +#endif + static const charT e1[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', '\\', 'x', '{', '2', '0', '2', '8', '}', + '\\', 'x', '{', '2', '0', '2', '9', '}', ']', ')', '\0' }; + static const charT e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + + charT c = static_cast(0x2029u); + bool b = (static_cast(c) == 0x2029u); + + return (b ? e1 : e2); +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif +} + +template <> +inline const char* get_escape_R_string() +{ +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable:4309) +#endif + static const char e2[] = { '(', '?', '>', '\x0D', '\x0A', '?', + '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', '\0' }; + return e2; +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif +} + +} // re_detail +} // boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif diff --git a/cutl/details/boost/regex/v4/regex_workaround.hpp b/cutl/details/boost/regex/v4/regex_workaround.hpp new file mode 100644 index 0000000..496ab8c --- /dev/null +++ b/cutl/details/boost/regex/v4/regex_workaround.hpp @@ -0,0 +1,232 @@ +/* + * + * Copyright (c) 1998-2005 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE regex_workarounds.cpp + * VERSION see + * DESCRIPTION: Declares Misc workarounds. + */ + +#ifndef BOOST_REGEX_WORKAROUND_HPP +#define BOOST_REGEX_WORKAROUND_HPP + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef BOOST_NO_STD_LOCALE +# include +#endif + +#if defined(BOOST_NO_STDC_NAMESPACE) +namespace std{ + using ::sprintf; using ::strcpy; using ::strcat; using ::strlen; +} +#endif + +namespace cutl_details_boost{ namespace re_detail{ +#ifdef BOOST_NO_STD_DISTANCE +template +std::ptrdiff_t distance(const T& x, const T& y) +{ return y - x; } +#else +using std::distance; +#endif +}} + + +#ifdef BOOST_REGEX_NO_BOOL +# define BOOST_REGEX_MAKE_BOOL(x) static_cast((x) ? true : false) +#else +# define BOOST_REGEX_MAKE_BOOL(x) static_cast(x) +#endif + +/***************************************************************************** + * + * Fix broken broken namespace support: + * + ****************************************************************************/ + +#if defined(BOOST_NO_STDC_NAMESPACE) && defined(__cplusplus) + +namespace std{ + using ::ptrdiff_t; + using ::size_t; + using ::abs; + using ::memset; + using ::memcpy; +} + +#endif + +/***************************************************************************** + * + * helper functions pointer_construct/pointer_destroy: + * + ****************************************************************************/ + +#ifdef __cplusplus +namespace cutl_details_boost{ namespace re_detail{ + +#ifdef BOOST_MSVC +#pragma warning (push) +#pragma warning (disable : 4100) +#endif + +template +inline void pointer_destroy(T* p) +{ p->~T(); (void)p; } + +#ifdef BOOST_MSVC +#pragma warning (pop) +#endif + +template +inline void pointer_construct(T* p, const T& t) +{ new (p) T(t); } + +}} // namespaces +#endif + +/***************************************************************************** + * + * helper function copy: + * + ****************************************************************************/ + +#ifdef __cplusplus +namespace cutl_details_boost{ namespace re_detail{ +#if BOOST_WORKAROUND(BOOST_MSVC,>=1400) && BOOST_WORKAROUND(BOOST_MSVC, <1600) && defined(_CPPLIB_VER) && defined(BOOST_DINKUMWARE_STDLIB) && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION)) + // + // MSVC 8 will either emit warnings or else refuse to compile + // code that makes perfectly legitimate use of std::copy, when + // the OutputIterator type is a user-defined class (apparently all user + // defined iterators are "unsafe"). This code works around that: + // + template + inline OutputIterator copy( + InputIterator first, + InputIterator last, + OutputIterator dest + ) + { + return stdext::unchecked_copy(first, last, dest); + } + template + inline bool equal( + InputIterator1 first, + InputIterator1 last, + InputIterator2 with + ) + { + return stdext::unchecked_equal(first, last, with); + } +#elif BOOST_WORKAROUND(BOOST_MSVC, > 1500) + // + // MSVC 10 will either emit warnings or else refuse to compile + // code that makes perfectly legitimate use of std::copy, when + // the OutputIterator type is a user-defined class (apparently all user + // defined iterators are "unsafe"). What's more Microsoft have removed their + // non-standard "unchecked" versions, even though their still in the MS + // documentation!! Work around this as best we can: + // + template + inline OutputIterator copy( + InputIterator first, + InputIterator last, + OutputIterator dest + ) + { + while(first != last) + *dest++ = *first++; + return dest; + } + template + inline bool equal( + InputIterator1 first, + InputIterator1 last, + InputIterator2 with + ) + { + while(first != last) + if(*first++ != *with++) return false; + return true; + } +#else + using std::copy; + using std::equal; +#endif +#if BOOST_WORKAROUND(BOOST_MSVC,>=1400) && defined(__STDC_WANT_SECURE_LIB__) && __STDC_WANT_SECURE_LIB__ + + // use safe versions of strcpy etc: + using ::strcpy_s; + using ::strcat_s; +#else + inline std::size_t strcpy_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + if(std::strlen(strSource)+1 > sizeInBytes) + return 1; + std::strcpy(strDestination, strSource); + return 0; + } + inline std::size_t strcat_s( + char *strDestination, + std::size_t sizeInBytes, + const char *strSource + ) + { + if(std::strlen(strSource) + std::strlen(strDestination) + 1 > sizeInBytes) + return 1; + std::strcat(strDestination, strSource); + return 0; + } + +#endif + + inline void overflow_error_if_not_zero(std::size_t i) + { + if(i) + { + std::overflow_error e("String buffer too small"); + cutl_details_boost::throw_exception(e); + } + } + +}} // namespaces + +#endif // __cplusplus + +#endif // include guard + diff --git a/cutl/details/boost/regex/v4/states.hpp b/cutl/details/boost/regex/v4/states.hpp new file mode 100644 index 0000000..8f1152a --- /dev/null +++ b/cutl/details/boost/regex/v4/states.hpp @@ -0,0 +1,301 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE states.cpp + * VERSION see + * DESCRIPTION: Declares internal state machine structures. + */ + +#ifndef BOOST_REGEX_V4_STATES_HPP +#define BOOST_REGEX_V4_STATES_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +/*** mask_type ******************************************************* +Whenever we have a choice of two alternatives, we use an array of bytes +to indicate which of the two alternatives it is possible to take for any +given input character. If mask_take is set, then we can take the next +state, and if mask_skip is set then we can take the alternative. +***********************************************************************/ +enum mask_type +{ + mask_take = 1, + mask_skip = 2, + mask_init = 4, + mask_any = mask_skip | mask_take, + mask_all = mask_any +}; + +/*** helpers ********************************************************** +These helpers let us use function overload resolution to detect whether +we have narrow or wide character strings: +***********************************************************************/ +struct _narrow_type{}; +struct _wide_type{}; +template struct is_byte; +template<> struct is_byte { typedef _narrow_type width_type; }; +template<> struct is_byte{ typedef _narrow_type width_type; }; +template<> struct is_byte { typedef _narrow_type width_type; }; +template struct is_byte { typedef _wide_type width_type; }; + +/*** enum syntax_element_type ****************************************** +Every record in the state machine falls into one of the following types: +***********************************************************************/ +enum syntax_element_type +{ + // start of a marked sub-expression, or perl-style (?...) extension + syntax_element_startmark = 0, + // end of a marked sub-expression, or perl-style (?...) extension + syntax_element_endmark = syntax_element_startmark + 1, + // any sequence of literal characters + syntax_element_literal = syntax_element_endmark + 1, + // start of line assertion: ^ + syntax_element_start_line = syntax_element_literal + 1, + // end of line assertion $ + syntax_element_end_line = syntax_element_start_line + 1, + // match any character: . + syntax_element_wild = syntax_element_end_line + 1, + // end of expression: we have a match when we get here + syntax_element_match = syntax_element_wild + 1, + // perl style word boundary: \b + syntax_element_word_boundary = syntax_element_match + 1, + // perl style within word boundary: \B + syntax_element_within_word = syntax_element_word_boundary + 1, + // start of word assertion: \< + syntax_element_word_start = syntax_element_within_word + 1, + // end of word assertion: \> + syntax_element_word_end = syntax_element_word_start + 1, + // start of buffer assertion: \` + syntax_element_buffer_start = syntax_element_word_end + 1, + // end of buffer assertion: \' + syntax_element_buffer_end = syntax_element_buffer_start + 1, + // backreference to previously matched sub-expression + syntax_element_backref = syntax_element_buffer_end + 1, + // either a wide character set [..] or one with multicharacter collating elements: + syntax_element_long_set = syntax_element_backref + 1, + // narrow character set: [...] + syntax_element_set = syntax_element_long_set + 1, + // jump to a new state in the machine: + syntax_element_jump = syntax_element_set + 1, + // choose between two production states: + syntax_element_alt = syntax_element_jump + 1, + // a repeat + syntax_element_rep = syntax_element_alt + 1, + // match a combining character sequence + syntax_element_combining = syntax_element_rep + 1, + // perl style soft buffer end: \z + syntax_element_soft_buffer_end = syntax_element_combining + 1, + // perl style continuation: \G + syntax_element_restart_continue = syntax_element_soft_buffer_end + 1, + // single character repeats: + syntax_element_dot_rep = syntax_element_restart_continue + 1, + syntax_element_char_rep = syntax_element_dot_rep + 1, + syntax_element_short_set_rep = syntax_element_char_rep + 1, + syntax_element_long_set_rep = syntax_element_short_set_rep + 1, + // a backstep for lookbehind repeats: + syntax_element_backstep = syntax_element_long_set_rep + 1, + // an assertion that a mark was matched: + syntax_element_assert_backref = syntax_element_backstep + 1, + syntax_element_toggle_case = syntax_element_assert_backref + 1, + // a recursive expression: + syntax_element_recurse = syntax_element_toggle_case + 1 +}; + +#ifdef BOOST_REGEX_DEBUG +// dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion +std::ostream& operator<<(std::ostream&, syntax_element_type); +#endif + +struct re_syntax_base; + +/*** union offset_type ************************************************ +Points to another state in the machine. During machine construction +we use integral offsets, but these are converted to pointers before +execution of the machine. +***********************************************************************/ +union offset_type +{ + re_syntax_base* p; + std::ptrdiff_t i; +}; + +/*** struct re_syntax_base ******************************************** +Base class for all states in the machine. +***********************************************************************/ +struct re_syntax_base +{ + syntax_element_type type; // what kind of state this is + offset_type next; // next state in the machine +}; + +/*** struct re_brace ************************************************** +A marked parenthesis. +***********************************************************************/ +struct re_brace : public re_syntax_base +{ + // The index to match, can be zero (don't mark the sub-expression) + // or negative (for perl style (?...) extentions): + int index; + bool icase; +}; + +/*** struct re_dot ************************************************** +Match anything. +***********************************************************************/ +enum +{ + dont_care = 1, + force_not_newline = 0, + force_newline = 2, + + test_not_newline = 2, + test_newline = 3 +}; +struct re_dot : public re_syntax_base +{ + unsigned char mask; +}; + +/*** struct re_literal ************************************************ +A string of literals, following this structure will be an +array of characters: charT[length] +***********************************************************************/ +struct re_literal : public re_syntax_base +{ + unsigned int length; +}; + +/*** struct re_case ************************************************ +Indicates whether we are moving to a case insensive block or not +***********************************************************************/ +struct re_case : public re_syntax_base +{ + bool icase; +}; + +/*** struct re_set_long *********************************************** +A wide character set of characters, following this structure will be +an array of type charT: +First csingles null-terminated strings +Then 2 * cranges NULL terminated strings +Then cequivalents NULL terminated strings +***********************************************************************/ +template +struct re_set_long : public re_syntax_base +{ + unsigned int csingles, cranges, cequivalents; + mask_type cclasses; + mask_type cnclasses; + bool isnot; + bool singleton; +}; + +/*** struct re_set **************************************************** +A set of narrow-characters, matches any of _map which is none-zero +***********************************************************************/ +struct re_set : public re_syntax_base +{ + unsigned char _map[1 << CHAR_BIT]; +}; + +/*** struct re_jump *************************************************** +Jump to a new location in the machine (not next). +***********************************************************************/ +struct re_jump : public re_syntax_base +{ + offset_type alt; // location to jump to +}; + +/*** struct re_alt *************************************************** +Jump to a new location in the machine (possibly next). +***********************************************************************/ +struct re_alt : public re_jump +{ + unsigned char _map[1 << CHAR_BIT]; // which characters can take the jump + unsigned int can_be_null; // true if we match a NULL string +}; + +/*** struct re_repeat ************************************************* +Repeat a section of the machine +***********************************************************************/ +struct re_repeat : public re_alt +{ + std::size_t min, max; // min and max allowable repeats + int state_id; // Unique identifier for this repeat + bool leading; // True if this repeat is at the start of the machine (lets us optimize some searches) + bool greedy; // True if this is a greedy repeat +}; + +/*** struct re_recurse ************************************************ +Recurse to a particular subexpression. +**********************************************************************/ +struct re_recurse : public re_jump +{ + int state_id; // identifier of first nested repeat within the recursion. +}; + +/*** enum re_jump_size_type ******************************************* +Provides compiled size of re_jump structure (allowing for trailing alignment). +We provide this so we know how manybytes to insert when constructing the machine +(The value of padding_mask is defined in regex_raw_buffer.hpp). +***********************************************************************/ +enum re_jump_size_type +{ + re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask), + re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask), + re_alt_size = (sizeof(re_alt) + padding_mask) & ~(padding_mask) +}; + +/*** proc re_is_set_member ********************************************* +Forward declaration: we'll need this one later... +***********************************************************************/ + +template +struct regex_data; + +template +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase); + +} // namespace re_detail + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + diff --git a/cutl/details/boost/regex/v4/sub_match.hpp b/cutl/details/boost/regex/v4/sub_match.hpp new file mode 100644 index 0000000..53f0b27 --- /dev/null +++ b/cutl/details/boost/regex/v4/sub_match.hpp @@ -0,0 +1,512 @@ +/* + * + * Copyright (c) 1998-2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE sub_match.cpp + * VERSION see + * DESCRIPTION: Declares template class sub_match. + */ + +#ifndef BOOST_REGEX_V4_SUB_MATCH_HPP +#define BOOST_REGEX_V4_SUB_MATCH_HPP + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +namespace cutl_details_boost{ + +template +struct sub_match : public std::pair +{ + typedef typename re_detail::regex_iterator_traits::value_type value_type; +#if defined(BOOST_NO_STD_ITERATOR_TRAITS) || defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) + typedef std::ptrdiff_t difference_type; +#else + typedef typename re_detail::regex_iterator_traits::difference_type difference_type; +#endif + typedef BidiIterator iterator_type; + typedef BidiIterator iterator; + typedef BidiIterator const_iterator; + + bool matched; + + sub_match() : std::pair(), matched(false) {} + sub_match(BidiIterator i) : std::pair(i, i), matched(false) {} +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1310)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)\ + && !BOOST_WORKAROUND(__DECCXX_VER, BOOST_TESTED_AT(60590042)) + template + operator std::basic_string ()const + { + return matched ? std::basic_string(this->first, this->second) : std::basic_string(); + } +#else + operator std::basic_string ()const + { + return str(); + } +#endif + difference_type BOOST_REGEX_CALL length()const + { + difference_type n = matched ? ::cutl_details_boost::re_detail::distance((BidiIterator)this->first, (BidiIterator)this->second) : 0; + return n; + } + std::basic_string str()const + { + std::basic_string result; + if(matched) + { + std::size_t len = ::cutl_details_boost::re_detail::distance((BidiIterator)this->first, (BidiIterator)this->second); + result.reserve(len); + BidiIterator i = this->first; + while(i != this->second) + { + result.append(1, *i); + ++i; + } + } + return result; + } + int compare(const sub_match& s)const + { + if(matched != s.matched) + return static_cast(matched) - static_cast(s.matched); + return str().compare(s.str()); + } + int compare(const std::basic_string& s)const + { + return str().compare(s); + } + int compare(const value_type* p)const + { + return str().compare(p); + } + + bool operator==(const sub_match& that)const + { return compare(that) == 0; } + bool BOOST_REGEX_CALL operator !=(const sub_match& that)const + { return compare(that) != 0; } + bool operator<(const sub_match& that)const + { return compare(that) < 0; } + bool operator>(const sub_match& that)const + { return compare(that) > 0; } + bool operator<=(const sub_match& that)const + { return compare(that) <= 0; } + bool operator>=(const sub_match& that)const + { return compare(that) >= 0; } + +#ifdef BOOST_REGEX_MATCH_EXTRA + typedef std::vector > capture_sequence_type; + + const capture_sequence_type& captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + // + // Private implementation API: DO NOT USE! + // + capture_sequence_type& get_captures()const + { + if(!m_captures) + m_captures.reset(new capture_sequence_type()); + return *m_captures; + } + +private: + mutable cutl_details_boost::scoped_ptr m_captures; +public: + +#endif + sub_match(const sub_match& that, bool +#ifdef BOOST_REGEX_MATCH_EXTRA + deep_copy +#endif + = true + ) + : std::pair(that), + matched(that.matched) + { +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + if(deep_copy) + m_captures.reset(new capture_sequence_type(*(that.m_captures))); +#endif + } + sub_match& operator=(const sub_match& that) + { + this->first = that.first; + this->second = that.second; + matched = that.matched; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(that.m_captures) + get_captures() = *(that.m_captures); +#endif + return *this; + } + + +#ifdef BOOST_OLD_REGEX_H + // + // the following are deprecated, do not use!! + // + operator int()const; + operator unsigned int()const; + operator short()const + { + return (short)(int)(*this); + } + operator unsigned short()const + { + return (unsigned short)(unsigned int)(*this); + } +#endif +}; + +typedef sub_match csub_match; +typedef sub_match ssub_match; +#ifndef BOOST_NO_WREGEX +typedef sub_match wcsub_match; +typedef sub_match wssub_match; +#endif + +// comparison to std::basic_string<> part 1: +template +inline bool operator == (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) == 0; } +template +inline bool operator != (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) != 0; } +template +inline bool operator < (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) < 0; } +template +inline bool operator <= (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) <= 0; } +template +inline bool operator >= (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) >= 0; } +template +inline bool operator > (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ return s.compare(m.str()) > 0; } +// comparison to std::basic_string<> part 2: +template +inline bool operator == (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) != 0; } +template +inline bool operator < (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) < 0; } +template +inline bool operator > (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) > 0; } +template +inline bool operator <= (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) <= 0; } +template +inline bool operator >= (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ return m.str().compare(s) >= 0; } +// comparison to const charT* part 1: +template +inline bool operator == (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const* s) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const* s) +{ return m.str().compare(s) != 0; } +template +inline bool operator > (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const* s) +{ return m.str().compare(s) > 0; } +template +inline bool operator < (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const* s) +{ return m.str().compare(s) < 0; } +template +inline bool operator >= (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const* s) +{ return m.str().compare(s) >= 0; } +template +inline bool operator <= (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const* s) +{ return m.str().compare(s) <= 0; } +// comparison to const charT* part 2: +template +inline bool operator == (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) == 0; } +template +inline bool operator != (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) != 0; } +template +inline bool operator < (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) > 0; } +template +inline bool operator > (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) < 0; } +template +inline bool operator <= (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) >= 0; } +template +inline bool operator >= (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ return m.str().compare(s) <= 0; } + +// comparison to const charT& part 1: +template +inline bool operator == (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +inline bool operator != (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +inline bool operator > (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +inline bool operator < (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +inline bool operator >= (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +inline bool operator <= (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } +// comparison to const charT* part 2: +template +inline bool operator == (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) == 0; } +template +inline bool operator != (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) != 0; } +template +inline bool operator < (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) > 0; } +template +inline bool operator > (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) < 0; } +template +inline bool operator <= (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) >= 0; } +template +inline bool operator >= (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ return m.str().compare(0, m.length(), &s, 1) <= 0; } + +// addition operators: +template +inline std::basic_string::value_type, traits, Allocator> +operator + (const std::basic_string::value_type, traits, Allocator>& s, + const sub_match& m) +{ + std::basic_string::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template +inline std::basic_string::value_type, traits, Allocator> +operator + (const sub_match& m, + const std::basic_string::value_type, traits, Allocator>& s) +{ + std::basic_string::value_type, traits, Allocator> result; + result.reserve(s.size() + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +#if !(defined(__GNUC__) && defined(BOOST_NO_STD_LOCALE)) +template +inline std::basic_string::value_type> +operator + (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ + std::basic_string::value_type> result; + result.reserve(std::char_traits::value_type>::length(s) + m.length() + 1); + return result.append(s).append(m.first, m.second); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const * s) +{ + std::basic_string::value_type> result; + result.reserve(std::char_traits::value_type>::length(s) + m.length() + 1); + return result.append(m.first, m.second).append(s); +} +#else +// worwaround versions: +template +inline std::basic_string::value_type> +operator + (typename re_detail::regex_iterator_traits::value_type const* s, + const sub_match& m) +{ + return s + m.str(); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const * s) +{ + return m.str() + s; +} +#endif +template +inline std::basic_string::value_type> +operator + (typename re_detail::regex_iterator_traits::value_type const& s, + const sub_match& m) +{ + std::basic_string::value_type> result; + result.reserve(m.length() + 2); + return result.append(1, s).append(m.first, m.second); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m, + typename re_detail::regex_iterator_traits::value_type const& s) +{ + std::basic_string::value_type> result; + result.reserve(m.length() + 2); + return result.append(m.first, m.second).append(1, s); +} +template +inline std::basic_string::value_type> +operator + (const sub_match& m1, + const sub_match& m2) +{ + std::basic_string::value_type> result; + result.reserve(m1.length() + m2.length() + 1); + return result.append(m1.first, m1.second).append(m2.first, m2.second); +} +#ifndef BOOST_NO_STD_LOCALE +template +std::basic_ostream& + operator << (std::basic_ostream& os, + const sub_match& s) +{ + return (os << s.str()); +} +#else +template +std::ostream& operator << (std::ostream& os, + const sub_match& s) +{ + return (os << s.str()); +} +#endif + +#ifdef BOOST_OLD_REGEX_H +namespace re_detail{ +template +int do_toi(BidiIterator i, BidiIterator j, char c, int radix) +{ + std::string s(i, j); + char* p; + int result = std::strtol(s.c_str(), &p, radix); + if(*p)raise_regex_exception("Bad sub-expression"); + return result; +} + +// +// helper: +template +int do_toi(I& i, I j, charT c) +{ + int result = 0; + while((i != j) && (isdigit(*i))) + { + result = result*10 + (*i - '0'); + ++i; + } + return result; +} +} + + +template +sub_match::operator int()const +{ + BidiIterator i = first; + BidiIterator j = second; + if(i == j)raise_regex_exception("Bad sub-expression"); + int neg = 1; + if((i != j) && (*i == '-')) + { + neg = -1; + ++i; + } + neg *= re_detail::do_toi(i, j, *i); + if(i != j)raise_regex_exception("Bad sub-expression"); + return neg; +} +template +sub_match::operator unsigned int()const +{ + BidiIterator i = first; + BidiIterator j = second; + if(i == j) + raise_regex_exception("Bad sub-expression"); + return re_detail::do_toi(i, j, *first); +} +#endif + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + diff --git a/cutl/details/boost/regex/v4/syntax_type.hpp b/cutl/details/boost/regex/v4/syntax_type.hpp new file mode 100644 index 0000000..4794c32 --- /dev/null +++ b/cutl/details/boost/regex/v4/syntax_type.hpp @@ -0,0 +1,105 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE syntax_type.hpp + * VERSION see + * DESCRIPTION: Declares regular expression synatx type enumerator. + */ + +#ifndef BOOST_REGEX_SYNTAX_TYPE_HPP +#define BOOST_REGEX_SYNTAX_TYPE_HPP + +namespace cutl_details_boost{ +namespace regex_constants{ + +typedef unsigned char syntax_type; + +// +// values chosen are binary compatible with previous version: +// +static const syntax_type syntax_char = 0; +static const syntax_type syntax_open_mark = 1; +static const syntax_type syntax_close_mark = 2; +static const syntax_type syntax_dollar = 3; +static const syntax_type syntax_caret = 4; +static const syntax_type syntax_dot = 5; +static const syntax_type syntax_star = 6; +static const syntax_type syntax_plus = 7; +static const syntax_type syntax_question = 8; +static const syntax_type syntax_open_set = 9; +static const syntax_type syntax_close_set = 10; +static const syntax_type syntax_or = 11; +static const syntax_type syntax_escape = 12; +static const syntax_type syntax_dash = 14; +static const syntax_type syntax_open_brace = 15; +static const syntax_type syntax_close_brace = 16; +static const syntax_type syntax_digit = 17; +static const syntax_type syntax_comma = 27; +static const syntax_type syntax_equal = 37; +static const syntax_type syntax_colon = 36; +static const syntax_type syntax_not = 53; + +// extensions: + +static const syntax_type syntax_hash = 13; +static const syntax_type syntax_newline = 26; + +// escapes: + +typedef syntax_type escape_syntax_type; + +static const escape_syntax_type escape_type_word_assert = 18; +static const escape_syntax_type escape_type_not_word_assert = 19; +static const escape_syntax_type escape_type_control_f = 29; +static const escape_syntax_type escape_type_control_n = 30; +static const escape_syntax_type escape_type_control_r = 31; +static const escape_syntax_type escape_type_control_t = 32; +static const escape_syntax_type escape_type_control_v = 33; +static const escape_syntax_type escape_type_ascii_control = 35; +static const escape_syntax_type escape_type_hex = 34; +static const escape_syntax_type escape_type_unicode = 0; // not used +static const escape_syntax_type escape_type_identity = 0; // not used +static const escape_syntax_type escape_type_backref = syntax_digit; +static const escape_syntax_type escape_type_decimal = syntax_digit; // not used +static const escape_syntax_type escape_type_class = 22; +static const escape_syntax_type escape_type_not_class = 23; + +// extensions: + +static const escape_syntax_type escape_type_left_word = 20; +static const escape_syntax_type escape_type_right_word = 21; +static const escape_syntax_type escape_type_start_buffer = 24; // for \` +static const escape_syntax_type escape_type_end_buffer = 25; // for \' +static const escape_syntax_type escape_type_control_a = 28; // for \a +static const escape_syntax_type escape_type_e = 38; // for \e +static const escape_syntax_type escape_type_E = 47; // for \Q\E +static const escape_syntax_type escape_type_Q = 48; // for \Q\E +static const escape_syntax_type escape_type_X = 49; // for \X +static const escape_syntax_type escape_type_C = 50; // for \C +static const escape_syntax_type escape_type_Z = 51; // for \Z +static const escape_syntax_type escape_type_G = 52; // for \G + +static const escape_syntax_type escape_type_property = 54; // for \p +static const escape_syntax_type escape_type_not_property = 55; // for \P +static const escape_syntax_type escape_type_named_char = 56; // for \N +static const escape_syntax_type escape_type_extended_backref = 57; // for \g +static const escape_syntax_type escape_type_reset_start_mark = 58; // for \K +static const escape_syntax_type escape_type_line_ending = 59; // for \R + +static const escape_syntax_type syntax_max = 60; + +} +} + + +#endif diff --git a/cutl/details/boost/regex/v4/u32regex_iterator.hpp b/cutl/details/boost/regex/v4/u32regex_iterator.hpp new file mode 100644 index 0000000..4a3068b --- /dev/null +++ b/cutl/details/boost/regex/v4/u32regex_iterator.hpp @@ -0,0 +1,193 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_iterator.hpp + * VERSION see + * DESCRIPTION: Provides u32regex_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_U32REGEX_ITERATOR_HPP +#define BOOST_REGEX_V4_U32REGEX_ITERATOR_HPP + +namespace cutl_details_boost{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +template +class u32regex_iterator_implementation +{ + typedef u32regex regex_type; + + match_results what; // current match + BidirectionalIterator base; // start of sequence + BidirectionalIterator end; // end of sequence + const regex_type re; // the expression + match_flag_type flags; // flags for matching + +public: + u32regex_iterator_implementation(const regex_type* p, BidirectionalIterator last, match_flag_type f) + : base(), end(last), re(*p), flags(f){} + bool init(BidirectionalIterator first) + { + base = first; + return u32regex_search(first, end, what, re, flags, base); + } + bool compare(const u32regex_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) && (end == that.end) && (flags == that.flags) && (what[0].first == that.what[0].first) && (what[0].second == that.what[0].second); + } + const match_results& get() + { return what; } + bool next() + { + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail; + BidirectionalIterator next_start = what[0].second; + match_flag_type f(flags); + if(!what.length()) + f |= regex_constants::match_not_initial_null; + //if(base != next_start) + // f |= regex_constants::match_not_bob; + bool result = u32regex_search(next_start, end, what, re, f, base); + if(result) + what.set_base(base); + return result; + } +private: + u32regex_iterator_implementation& operator=(const u32regex_iterator_implementation&); +}; + +template +class u32regex_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + match_results, + typename re_detail::regex_iterator_traits::difference_type, + const match_results*, + const match_results& > +#endif +{ +private: + typedef u32regex_iterator_implementation impl; + typedef shared_ptr pimpl; +public: + typedef u32regex regex_type; + typedef match_results value_type; + typedef typename re_detail::regex_iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_iterator(){} + u32regex_iterator(BidirectionalIterator a, BidirectionalIterator b, + const regex_type& re, + match_flag_type m = match_default) + : pdata(new impl(&re, b, m)) + { + if(!pdata->init(a)) + { + pdata.reset(); + } + } + u32regex_iterator(const u32regex_iterator& that) + : pdata(that.pdata) {} + u32regex_iterator& operator=(const u32regex_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_iterator operator++(int) + { + u32regex_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_iterator utf8regex_iterator; +typedef u32regex_iterator utf16regex_iterator; +typedef u32regex_iterator utf32regex_iterator; + +inline u32regex_iterator make_u32regex_iterator(const char* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+std::strlen(p), e, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_iterator make_u32regex_iterator(const wchar_t* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+std::wcslen(p), e, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_iterator make_u32regex_iterator(const UChar* p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(p, p+u_strlen(p), e, m); +} +#endif +template +inline u32regex_iterator::const_iterator> make_u32regex_iterator(const std::basic_string& p, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_iterator(p.begin(), p.end(), e, m); +} +inline u32regex_iterator make_u32regex_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, m); +} + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_ITERATOR_HPP + diff --git a/cutl/details/boost/regex/v4/u32regex_token_iterator.hpp b/cutl/details/boost/regex/v4/u32regex_token_iterator.hpp new file mode 100644 index 0000000..755f65e --- /dev/null +++ b/cutl/details/boost/regex/v4/u32regex_token_iterator.hpp @@ -0,0 +1,377 @@ +/* + * + * Copyright (c) 2003 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE u32regex_token_iterator.hpp + * VERSION see + * DESCRIPTION: Provides u32regex_token_iterator implementation. + */ + +#ifndef BOOST_REGEX_V4_U32REGEX_TOKEN_ITERATOR_HPP +#define BOOST_REGEX_V4_U32REGEX_TOKEN_ITERATOR_HPP + +#if (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) +// +// Borland C++ Builder 6, and Visual C++ 6, +// can't cope with the array template constructor +// so we have a template member that will accept any type as +// argument, and then assert that is really is an array: +// +#include +#include +#endif + +namespace cutl_details_boost{ + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(push) +# pragma warning(disable:4700) +#endif + +template +class u32regex_token_iterator_implementation +{ + typedef u32regex regex_type; + typedef sub_match value_type; + + match_results what; // current match + BidirectionalIterator end; // end of search area + BidirectionalIterator base; // start of search area + const regex_type re; // the expression + match_flag_type flags; // match flags + value_type result; // the current string result + int N; // the current sub-expression being enumerated + std::vector subs; // the sub-expressions to enumerate + +public: + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, int sub, match_flag_type f) + : end(last), re(*p), flags(f){ subs.push_back(sub); } + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const std::vector& v, match_flag_type f) + : end(last), re(*p), flags(f), subs(v){} +#if BOOST_WORKAROUND(BOOST_MSVC, < 1300) + // can't reliably get this to work.... +#elif (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const T& submatches, match_flag_type f) + : end(last), re(*p), flags(f) + { + // assert that T really is an array: + BOOST_STATIC_ASSERT(::cutl_details_boost::is_array::value); + const std::size_t array_size = sizeof(T) / sizeof(submatches[0]); + for(std::size_t i = 0; i < array_size; ++i) + { + subs.push_back(submatches[i]); + } + } +#else + template + u32regex_token_iterator_implementation(const regex_type* p, BidirectionalIterator last, const int (&submatches)[CN], match_flag_type f) + : end(last), re(*p), flags(f) + { + for(std::size_t i = 0; i < CN; ++i) + { + subs.push_back(submatches[i]); + } + } +#endif + + bool init(BidirectionalIterator first) + { + base = first; + N = 0; + if(u32regex_search(first, end, what, re, flags, base) == true) + { + N = 0; + result = ((subs[N] == -1) ? what.prefix() : what[(int)subs[N]]); + return true; + } + else if((subs[N] == -1) && (first != end)) + { + result.first = first; + result.second = end; + result.matched = (first != end); + N = -1; + return true; + } + return false; + } + bool compare(const u32regex_token_iterator_implementation& that) + { + if(this == &that) return true; + return (&re.get_data() == &that.re.get_data()) + && (end == that.end) + && (flags == that.flags) + && (N == that.N) + && (what[0].first == that.what[0].first) + && (what[0].second == that.what[0].second); + } + const value_type& get() + { return result; } + bool next() + { + if(N == -1) + return false; + if(N+1 < (int)subs.size()) + { + ++N; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + //if(what.prefix().first != what[0].second) + // flags |= match_prev_avail | regex_constants::match_not_bob; + BidirectionalIterator last_end(what[0].second); + if(u32regex_search(last_end, end, what, re, ((what[0].first == what[0].second) ? flags | regex_constants::match_not_initial_null : flags), base)) + { + N =0; + result =((subs[N] == -1) ? what.prefix() : what[subs[N]]); + return true; + } + else if((last_end != end) && (subs[0] == -1)) + { + N =-1; + result.first = last_end; + result.second = end; + result.matched = (last_end != end); + return true; + } + return false; + } +private: + u32regex_token_iterator_implementation& operator=(const u32regex_token_iterator_implementation&); +}; + +template +class u32regex_token_iterator +#ifndef BOOST_NO_STD_ITERATOR + : public std::iterator< + std::forward_iterator_tag, + sub_match, + typename re_detail::regex_iterator_traits::difference_type, + const sub_match*, + const sub_match& > +#endif +{ +private: + typedef u32regex_token_iterator_implementation impl; + typedef shared_ptr pimpl; +public: + typedef u32regex regex_type; + typedef sub_match value_type; + typedef typename re_detail::regex_iterator_traits::difference_type + difference_type; + typedef const value_type* pointer; + typedef const value_type& reference; + typedef std::forward_iterator_tag iterator_category; + + u32regex_token_iterator(){} + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + int submatch = 0, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatch, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const std::vector& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#if BOOST_WORKAROUND(BOOST_MSVC, < 1300) + // can't reliably get this to work.... +#elif (BOOST_WORKAROUND(__BORLANDC__, >= 0x560) && BOOST_WORKAROUND(__BORLANDC__, BOOST_TESTED_AT(0x570)))\ + || BOOST_WORKAROUND(BOOST_MSVC, < 1300) \ + || BOOST_WORKAROUND(__MWERKS__, BOOST_TESTED_AT(0x3003)) \ + || BOOST_WORKAROUND(__HP_aCC, < 60700) + template + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const T& submatches, match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#else + template + u32regex_token_iterator(BidirectionalIterator a, BidirectionalIterator b, const regex_type& re, + const int (&submatches)[N], match_flag_type m = match_default) + : pdata(new impl(&re, b, submatches, m)) + { + if(!pdata->init(a)) + pdata.reset(); + } +#endif + u32regex_token_iterator(const u32regex_token_iterator& that) + : pdata(that.pdata) {} + u32regex_token_iterator& operator=(const u32regex_token_iterator& that) + { + pdata = that.pdata; + return *this; + } + bool operator==(const u32regex_token_iterator& that)const + { + if((pdata.get() == 0) || (that.pdata.get() == 0)) + return pdata.get() == that.pdata.get(); + return pdata->compare(*(that.pdata.get())); + } + bool operator!=(const u32regex_token_iterator& that)const + { return !(*this == that); } + const value_type& operator*()const + { return pdata->get(); } + const value_type* operator->()const + { return &(pdata->get()); } + u32regex_token_iterator& operator++() + { + cow(); + if(0 == pdata->next()) + { + pdata.reset(); + } + return *this; + } + u32regex_token_iterator operator++(int) + { + u32regex_token_iterator result(*this); + ++(*this); + return result; + } +private: + + pimpl pdata; + + void cow() + { + // copy-on-write + if(pdata.get() && !pdata.unique()) + { + pdata.reset(new impl(*(pdata.get()))); + } + } +}; + +typedef u32regex_token_iterator utf8regex_token_iterator; +typedef u32regex_token_iterator utf16regex_token_iterator; +typedef u32regex_token_iterator utf32regex_token_iterator; + +// construction from an integral sub_match state_id: +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, submatch, m); +} +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, int submatch = 0, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1300) +// construction from a reference to an array: +template +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +template +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +template +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, m); +} +template +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const int (&submatch)[N], regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} +#endif // BOOST_MSVC < 1300 + +// construction from a vector of sub_match state_id's: +inline u32regex_token_iterator make_u32regex_token_iterator(const char* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::strlen(p), e, submatch, m); +} +#ifndef BOOST_NO_WREGEX +inline u32regex_token_iterator make_u32regex_token_iterator(const wchar_t* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+std::wcslen(p), e, submatch, m); +} +#endif +#if !defined(U_WCHAR_IS_UTF16) && (U_SIZEOF_WCHAR_T != 2) +inline u32regex_token_iterator make_u32regex_token_iterator(const UChar* p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(p, p+u_strlen(p), e, submatch, m); +} +#endif +template +inline u32regex_token_iterator::const_iterator> make_u32regex_token_iterator(const std::basic_string& p, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + typedef typename std::basic_string::const_iterator iter_type; + return u32regex_token_iterator(p.begin(), p.end(), e, m); +} +inline u32regex_token_iterator make_u32regex_token_iterator(const U_NAMESPACE_QUALIFIER UnicodeString& s, const u32regex& e, const std::vector& submatch, regex_constants::match_flag_type m = regex_constants::match_default) +{ + return u32regex_token_iterator(s.getBuffer(), s.getBuffer() + s.length(), e, submatch, m); +} + +#if BOOST_WORKAROUND(BOOST_MSVC, > 1300) +# pragma warning(pop) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +} // namespace cutl_details_boost + +#endif // BOOST_REGEX_V4_REGEX_TOKEN_ITERATOR_HPP + + + + diff --git a/cutl/details/boost/regex/v4/w32_regex_traits.hpp b/cutl/details/boost/regex/v4/w32_regex_traits.hpp new file mode 100644 index 0000000..454c915 --- /dev/null +++ b/cutl/details/boost/regex/v4/w32_regex_traits.hpp @@ -0,0 +1,741 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE w32_regex_traits.hpp + * VERSION see + * DESCRIPTION: Declares regular expression traits class w32_regex_traits. + */ + +#ifndef BOOST_W32_REGEX_TRAITS_HPP_INCLUDED +#define BOOST_W32_REGEX_TRAITS_HPP_INCLUDED + +#ifndef BOOST_RE_PAT_EXCEPT_HPP +#include +#endif +#ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED +#include +#endif +#ifdef BOOST_HAS_THREADS +#include +#endif +#ifndef BOOST_REGEX_PRIMARY_TRANSFORM +#include +#endif +#ifndef BOOST_REGEX_OBJECT_CACHE_HPP +#include +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4786) +#pragma warning(disable:4800) +#endif + +namespace cutl_details_boost{ + +// +// forward declaration is needed by some compilers: +// +template +class w32_regex_traits; + +namespace re_detail{ + +// +// start by typedeffing the types we'll need: +// +typedef ::cutl_details_boost::uint32_t lcid_type; // placeholder for LCID. +typedef ::cutl_details_boost::shared_ptr cat_type; // placeholder for dll HANDLE. + +// +// then add wrappers around the actual Win32 API's (ie implementation hiding): +// +BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale(); +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t, lcid_type); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type state_id); +#endif +#endif +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t, lcid_type); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type state_id); +#endif +#endif +BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name); +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::string& def); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type state_id, int i, const std::wstring& def); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string& def); +#endif +#endif +BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type state_id, const char* p1, const char* p2); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type state_id, const wchar_t* p1, const wchar_t* p2); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL std::basic_string BOOST_REGEX_CALL w32_transform(lcid_type state_id, const unsigned short* p1, const unsigned short* p2); +#endif +#endif +BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type state_id); +#endif +#endif +BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type); +#endif +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type, cutl_details_boost::uint32_t mask, char c); +#ifndef BOOST_NO_WREGEX +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type, cutl_details_boost::uint32_t mask, wchar_t c); +#ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T +BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type state_id, cutl_details_boost::uint32_t m, unsigned short c); +#endif +#endif +// +// class w32_regex_traits_base: +// acts as a container for locale and the facets we are using. +// +template +struct w32_regex_traits_base +{ + w32_regex_traits_base(lcid_type l) + { imbue(l); } + lcid_type imbue(lcid_type l); + + lcid_type m_locale; +}; + +template +inline lcid_type w32_regex_traits_base::imbue(lcid_type l) +{ + lcid_type result(m_locale); + m_locale = l; + return result; +} + +// +// class w32_regex_traits_char_layer: +// implements methods that require specialisation for narrow characters: +// +template +class w32_regex_traits_char_layer : public w32_regex_traits_base +{ + typedef std::basic_string string_type; + typedef std::map map_type; + typedef typename map_type::const_iterator map_iterator_type; +public: + w32_regex_traits_char_layer(const lcid_type l); + + regex_constants::syntax_type syntax_type(charT c)const + { + map_iterator_type i = m_char_map.find(c); + return ((i == m_char_map.end()) ? 0 : i->second); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + map_iterator_type i = m_char_map.find(c); + if(i == m_char_map.end()) + { + if(::cutl_details_boost::re_detail::w32_is_lower(c, this->m_locale)) return regex_constants::escape_type_class; + if(::cutl_details_boost::re_detail::w32_is_upper(c, this->m_locale)) return regex_constants::escape_type_not_class; + return 0; + } + return i->second; + } + charT tolower(charT c)const + { + return ::cutl_details_boost::re_detail::w32_tolower(c, this->m_locale); + } + bool isctype(cutl_details_boost::uint32_t mask, charT c)const + { + return ::cutl_details_boost::re_detail::w32_is(this->m_locale, mask, c); + } + +private: + string_type get_default_message(regex_constants::syntax_type); + // TODO: use a hash table when available! + map_type m_char_map; +}; + +template +w32_regex_traits_char_layer::w32_regex_traits_char_layer(::cutl_details_boost::re_detail::lcid_type l) + : w32_regex_traits_base(l) +{ + // we need to start by initialising our syntax map so we know which + // character is used for which purpose: + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::cutl_details_boost::re_detail::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + cutl_details_boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + string_type mss = ::cutl_details_boost::re_detail::w32_cat_get(cat, this->m_locale, i, get_default_message(i)); + for(typename string_type::size_type j = 0; j < mss.size(); ++j) + { + this->m_char_map[mss[j]] = i; + } + } + } + else + { + for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i) + { + const char* ptr = get_default_syntax(i); + while(ptr && *ptr) + { + this->m_char_map[static_cast(*ptr)] = i; + ++ptr; + } + } + } +} + +template +typename w32_regex_traits_char_layer::string_type + w32_regex_traits_char_layer::get_default_message(regex_constants::syntax_type i) +{ + const char* ptr = get_default_syntax(i); + string_type result; + while(ptr && *ptr) + { + result.append(1, static_cast(*ptr)); + ++ptr; + } + return result; +} + +// +// specialised version for narrow characters: +// +template <> +class BOOST_REGEX_DECL w32_regex_traits_char_layer : public w32_regex_traits_base +{ + typedef std::string string_type; +public: + w32_regex_traits_char_layer(::cutl_details_boost::re_detail::lcid_type l) + : w32_regex_traits_base(l) + { + init(); + } + + regex_constants::syntax_type syntax_type(char c)const + { + return m_char_map[static_cast(c)]; + } + regex_constants::escape_syntax_type escape_syntax_type(char c) const + { + return m_char_map[static_cast(c)]; + } + char tolower(char c)const + { + return m_lower_map[static_cast(c)]; + } + bool isctype(cutl_details_boost::uint32_t mask, char c)const + { + return m_type_map[static_cast(c)] & mask; + } + +private: + regex_constants::syntax_type m_char_map[1u << CHAR_BIT]; + char m_lower_map[1u << CHAR_BIT]; + cutl_details_boost::uint16_t m_type_map[1u << CHAR_BIT]; + void init(); +}; + +// +// class w32_regex_traits_implementation: +// provides pimpl implementation for w32_regex_traits. +// +template +class w32_regex_traits_implementation : public w32_regex_traits_char_layer +{ +public: + typedef typename w32_regex_traits::char_class_type char_class_type; + BOOST_STATIC_CONSTANT(char_class_type, mask_word = 0x0400); // must be C1_DEFINED << 1 + BOOST_STATIC_CONSTANT(char_class_type, mask_unicode = 0x0800); // must be C1_DEFINED << 2 + BOOST_STATIC_CONSTANT(char_class_type, mask_horizontal = 0x1000); // must be C1_DEFINED << 3 + BOOST_STATIC_CONSTANT(char_class_type, mask_vertical = 0x2000); // must be C1_DEFINED << 4 + BOOST_STATIC_CONSTANT(char_class_type, mask_base = 0x3ff); // all the masks used by the CT_CTYPE1 group + + typedef std::basic_string string_type; + typedef charT char_type; + w32_regex_traits_implementation(::cutl_details_boost::re_detail::lcid_type l); + std::string error_string(regex_constants::error_type n) const + { + if(!m_error_strings.empty()) + { + std::map::const_iterator p = m_error_strings.find(n); + return (p == m_error_strings.end()) ? std::string(get_default_error_string(n)) : p->second; + } + return get_default_error_string(n); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + char_class_type result = lookup_classname_imp(p1, p2); + if(result == 0) + { + typedef typename string_type::size_type size_type; + string_type temp(p1, p2); + for(size_type i = 0; i < temp.size(); ++i) + temp[i] = this->tolower(temp[i]); + result = lookup_classname_imp(&*temp.begin(), &*temp.begin() + temp.size()); + } + return result; + } + string_type lookup_collatename(const charT* p1, const charT* p2) const; + string_type transform_primary(const charT* p1, const charT* p2) const; + string_type transform(const charT* p1, const charT* p2) const + { + return ::cutl_details_boost::re_detail::w32_transform(this->m_locale, p1, p2); + } +private: + std::map m_error_strings; // error messages indexed by numberic ID + std::map m_custom_class_names; // character class names + std::map m_custom_collate_names; // collating element names + unsigned m_collate_type; // the form of the collation string + charT m_collate_delim; // the collation group delimiter + // + // helpers: + // + char_class_type lookup_classname_imp(const charT* p1, const charT* p2) const; +}; + +template +typename w32_regex_traits_implementation::string_type + w32_regex_traits_implementation::transform_primary(const charT* p1, const charT* p2) const +{ + string_type result; + // + // What we do here depends upon the format of the sort key returned by + // sort key returned by this->transform: + // + switch(m_collate_type) + { + case sort_C: + case sort_unknown: + // the best we can do is translate to lower case, then get a regular sort key: + { + result.assign(p1, p2); + typedef typename string_type::size_type size_type; + for(size_type i = 0; i < result.size(); ++i) + result[i] = this->tolower(result[i]); + result = this->transform(&*result.begin(), &*result.begin() + result.size()); + break; + } + case sort_fixed: + { + // get a regular sort key, and then truncate it: + result.assign(this->transform(p1, p2)); + result.erase(this->m_collate_delim); + break; + } + case sort_delim: + // get a regular sort key, and then truncate everything after the delim: + result.assign(this->transform(p1, p2)); + std::size_t i; + for(i = 0; i < result.size(); ++i) + { + if(result[i] == m_collate_delim) + break; + } + result.erase(i); + break; + } + if(result.empty()) + result = string_type(1, charT(0)); + return result; +} + +template +typename w32_regex_traits_implementation::string_type + w32_regex_traits_implementation::lookup_collatename(const charT* p1, const charT* p2) const +{ + typedef typename std::map::const_iterator iter_type; + if(m_custom_collate_names.size()) + { + iter_type pos = m_custom_collate_names.find(string_type(p1, p2)); + if(pos != m_custom_collate_names.end()) + return pos->second; + } +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + std::string name(p1, p2); +#else + std::string name; + const charT* p0 = p1; + while(p0 != p2) + name.append(1, char(*p0++)); +#endif + name = lookup_default_collate_name(name); +#if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\ + && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\ + && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551) + if(name.size()) + return string_type(name.begin(), name.end()); +#else + if(name.size()) + { + string_type result; + typedef std::string::const_iterator iter; + iter b = name.begin(); + iter e = name.end(); + while(b != e) + result.append(1, charT(*b++)); + return result; + } +#endif + if(p2 - p1 == 1) + return string_type(1, *p1); + return string_type(); +} + +template +w32_regex_traits_implementation::w32_regex_traits_implementation(::cutl_details_boost::re_detail::lcid_type l) +: w32_regex_traits_char_layer(l) +{ + cat_type cat; + std::string cat_name(w32_regex_traits::get_catalog_name()); + if(cat_name.size()) + { + cat = ::cutl_details_boost::re_detail::w32_cat_open(cat_name); + if(!cat) + { + std::string m("Unable to open message catalog: "); + std::runtime_error err(m + cat_name); + cutl_details_boost::re_detail::raise_runtime_error(err); + } + } + // + // if we have a valid catalog then load our messages: + // + if(cat) + { + // + // Error messages: + // + for(cutl_details_boost::regex_constants::error_type i = static_cast(0); + i <= cutl_details_boost::regex_constants::error_unknown; + i = static_cast(i + 1)) + { + const char* p = get_default_error_string(i); + string_type default_message; + while(*p) + { + default_message.append(1, static_cast(*p)); + ++p; + } + string_type s = ::cutl_details_boost::re_detail::w32_cat_get(cat, this->m_locale, i+200, default_message); + std::string result; + for(std::string::size_type j = 0; j < s.size(); ++j) + { + result.append(1, static_cast(s[j])); + } + m_error_strings[i] = result; + } + // + // Custom class names: + // + static const char_class_type masks[14] = + { + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + 0x0080u, // C1_XDIGIT + 0x0040u, // C1_BLANK + w32_regex_traits_implementation::mask_word, + w32_regex_traits_implementation::mask_unicode, + }; + static const string_type null_string; + for(unsigned int j = 0; j <= 13; ++j) + { + string_type s(::cutl_details_boost::re_detail::w32_cat_get(cat, this->m_locale, j+300, null_string)); + if(s.size()) + this->m_custom_class_names[s] = masks[j]; + } + } + // + // get the collation format used by m_pcollate: + // + m_collate_type = re_detail::find_sort_syntax(this, &m_collate_delim); +} + +template +typename w32_regex_traits_implementation::char_class_type + w32_regex_traits_implementation::lookup_classname_imp(const charT* p1, const charT* p2) const +{ + static const char_class_type masks[22] = + { + 0, + 0x0104u, // C1_ALPHA | C1_DIGIT + 0x0100u, // C1_ALPHA + 0x0040u, // C1_BLANK + 0x0020u, // C1_CNTRL + 0x0004u, // C1_DIGIT + 0x0004u, // C1_DIGIT + (~(0x0020u|0x0008u|0x0040) & 0x01ffu) | 0x0400u, // not C1_CNTRL or C1_SPACE or C1_BLANK + w32_regex_traits_implementation::mask_horizontal, + 0x0002u, // C1_LOWER + 0x0002u, // C1_LOWER + (~0x0020u & 0x01ffu) | 0x0400, // not C1_CNTRL + 0x0010u, // C1_PUNCT + 0x0008u, // C1_SPACE + 0x0008u, // C1_SPACE + 0x0001u, // C1_UPPER + w32_regex_traits_implementation::mask_unicode, + 0x0001u, // C1_UPPER + w32_regex_traits_implementation::mask_vertical, + 0x0104u | w32_regex_traits_implementation::mask_word, + 0x0104u | w32_regex_traits_implementation::mask_word, + 0x0080u, // C1_XDIGIT + }; + if(m_custom_class_names.size()) + { + typedef typename std::map, char_class_type>::const_iterator map_iter; + map_iter pos = m_custom_class_names.find(string_type(p1, p2)); + if(pos != m_custom_class_names.end()) + return pos->second; + } + std::size_t state_id = 1 + re_detail::get_default_class_id(p1, p2); + if(state_id < sizeof(masks) / sizeof(masks[0])) + return masks[state_id]; + return masks[0]; +} + + +template +cutl_details_boost::shared_ptr > create_w32_regex_traits(::cutl_details_boost::re_detail::lcid_type l BOOST_APPEND_EXPLICIT_TEMPLATE_TYPE(charT)) +{ + // TODO: create a cache for previously constructed objects. + return cutl_details_boost::object_cache< ::cutl_details_boost::re_detail::lcid_type, w32_regex_traits_implementation >::get(l, 5); +} + +} // re_detail + +template +class w32_regex_traits +{ +public: + typedef charT char_type; + typedef std::size_t size_type; + typedef std::basic_string string_type; + typedef ::cutl_details_boost::re_detail::lcid_type locale_type; + typedef cutl_details_boost::uint_least32_t char_class_type; + + struct boost_extensions_tag{}; + + w32_regex_traits() + : m_pimpl(re_detail::create_w32_regex_traits(::cutl_details_boost::re_detail::w32_get_default_locale())) + { } + static size_type length(const char_type* p) + { + return std::char_traits::length(p); + } + regex_constants::syntax_type syntax_type(charT c)const + { + return m_pimpl->syntax_type(c); + } + regex_constants::escape_syntax_type escape_syntax_type(charT c) const + { + return m_pimpl->escape_syntax_type(c); + } + charT translate(charT c) const + { + return c; + } + charT translate_nocase(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT translate(charT c, bool icase) const + { + return icase ? this->m_pimpl->tolower(c) : c; + } + charT tolower(charT c) const + { + return this->m_pimpl->tolower(c); + } + charT toupper(charT c) const + { + return ::cutl_details_boost::re_detail::w32_toupper(c, this->m_pimpl->m_locale); + } + string_type transform(const charT* p1, const charT* p2) const + { + return ::cutl_details_boost::re_detail::w32_transform(this->m_pimpl->m_locale, p1, p2); + } + string_type transform_primary(const charT* p1, const charT* p2) const + { + return m_pimpl->transform_primary(p1, p2); + } + char_class_type lookup_classname(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_classname(p1, p2); + } + string_type lookup_collatename(const charT* p1, const charT* p2) const + { + return m_pimpl->lookup_collatename(p1, p2); + } + bool isctype(charT c, char_class_type f) const + { + if((f & re_detail::w32_regex_traits_implementation::mask_base) + && (this->m_pimpl->isctype(f & re_detail::w32_regex_traits_implementation::mask_base, c))) + return true; + else if((f & re_detail::w32_regex_traits_implementation::mask_unicode) && re_detail::is_extended(c)) + return true; + else if((f & re_detail::w32_regex_traits_implementation::mask_word) && (c == '_')) + return true; + else if((f & re_detail::w32_regex_traits_implementation::mask_vertical) + && (::cutl_details_boost::re_detail::is_separator(c) || (c == '\v'))) + return true; + else if((f & re_detail::w32_regex_traits_implementation::mask_horizontal) + && this->isctype(c, 0x0008u) && !this->isctype(c, re_detail::w32_regex_traits_implementation::mask_vertical)) + return true; + return false; + } + int toi(const charT*& p1, const charT* p2, int radix)const + { + return ::cutl_details_boost::re_detail::global_toi(p1, p2, radix, *this); + } + int value(charT c, int radix)const + { + int result = ::cutl_details_boost::re_detail::global_value(c); + return result < radix ? result : -1; + } + locale_type imbue(locale_type l) + { + ::cutl_details_boost::re_detail::lcid_type result(getloc()); + m_pimpl = re_detail::create_w32_regex_traits(l); + return result; + } + locale_type getloc()const + { + return m_pimpl->m_locale; + } + std::string error_string(regex_constants::error_type n) const + { + return m_pimpl->error_string(n); + } + + // + // extension: + // set the name of the message catalog in use (defaults to "boost_regex"). + // + static std::string catalog_name(const std::string& name); + static std::string get_catalog_name(); + +private: + cutl_details_boost::shared_ptr > m_pimpl; + // + // catalog name handler: + // + static std::string& get_catalog_name_inst(); + +#ifdef BOOST_HAS_THREADS + static static_mutex& get_mutex_inst(); +#endif +}; + +template +std::string w32_regex_traits::catalog_name(const std::string& name) +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + get_catalog_name_inst() = name; + return result; +} + +template +std::string& w32_regex_traits::get_catalog_name_inst() +{ + static std::string s_name; + return s_name; +} + +template +std::string w32_regex_traits::get_catalog_name() +{ +#ifdef BOOST_HAS_THREADS + static_mutex::scoped_lock lk(get_mutex_inst()); +#endif + std::string result(get_catalog_name_inst()); + return result; +} + +#ifdef BOOST_HAS_THREADS +template +static_mutex& w32_regex_traits::get_mutex_inst() +{ + static static_mutex s_mutex = BOOST_STATIC_MUTEX_INIT; + return s_mutex; +} +#endif + + +} // boost + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif -- cgit v1.1