From b51965dddbed68f23c5e8c169c23c794313ce5f6 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 28 Jun 2011 17:17:23 +0200 Subject: Add boost subset as an implementation detail --- cutl/details/boost/regex/v4/perl_matcher.hpp | 584 +++++++++++++++++++++++++++ 1 file changed, 584 insertions(+) create mode 100644 cutl/details/boost/regex/v4/perl_matcher.hpp (limited to 'cutl/details/boost/regex/v4/perl_matcher.hpp') diff --git a/cutl/details/boost/regex/v4/perl_matcher.hpp b/cutl/details/boost/regex/v4/perl_matcher.hpp new file mode 100644 index 0000000..8da8bed --- /dev/null +++ b/cutl/details/boost/regex/v4/perl_matcher.hpp @@ -0,0 +1,584 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + +#ifndef BOOST_REGEX_MATCHER_HPP +#define BOOST_REGEX_MATCHER_HPP + +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +// +// error checking API: +// +BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(cutl_details_boost::regex_constants::syntax_option_type ef, match_flag_type mf); +// +// function can_start: +// +template +inline bool can_start(charT c, const unsigned char* map, unsigned char mask) +{ + return ((c < static_cast(0)) ? true : ((c >= static_cast(1 << CHAR_BIT)) ? true : map[c] & mask)); +} +inline bool can_start(char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(signed char c, const unsigned char* map, unsigned char mask) +{ + return map[(unsigned char)c] & mask; +} +inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask) +{ + return map[c] & mask; +} +inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask) +{ + return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask); +} +#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives. +#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask) +{ + return ((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask); +} +#endif +#endif +#if !defined(BOOST_NO_INTRINSIC_WCHAR_T) +inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask) +{ + return (((c >= static_cast(1u << CHAR_BIT)) ? true : map[c] & mask)); +} +#endif + + +// +// Unfortunately Rogue Waves standard library appears to have a bug +// in std::basic_string::compare that results in eroneous answers +// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version +// 0x020101) the test case was: +// {39135,0} < {0xff,0} +// which succeeds when it should not. +// +#ifndef _RWSTD_VER +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) +template +inline int string_compare(const std::basic_string& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +#endif +#else +#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310) +template +inline int string_compare(const std::basic_string& s, const C* p) +{ + if(0 == *p) + { + if(s.empty() || ((s.size() == 1) && (s[0] == 0))) + return 0; + } + return s.compare(p); +} +#endif +inline int string_compare(const std::string& s, const char* p) +{ return std::strcmp(s.c_str(), p); } +# ifndef BOOST_NO_WREGEX +inline int string_compare(const std::wstring& s, const wchar_t* p) +{ return std::wcscmp(s.c_str(), p); } +#endif +#endif +template +inline int string_compare(const Seq& s, const C* p) +{ + std::size_t i = 0; + while((i < s.size()) && (p[i] == s[i])) + { + ++i; + } + return (i == s.size()) ? -p[i] : s[i] - p[i]; +} +# define STR_COMP(s,p) string_compare(s,p) + +template +inline const charT* re_skip_past_null(const charT* p) +{ + while (*p != static_cast(0)) ++p; + return ++p; +} + +template +iterator BOOST_REGEX_CALL re_is_set_member(iterator next, + iterator last, + const re_set_long* set_, + const regex_data& e, bool icase) +{ + const charT* p = reinterpret_cast(set_+1); + iterator ptr; + unsigned int i; + //bool icase = e.m_flags & regex_constants::icase; + + if(next == last) return next; + + typedef typename traits_type::string_type traits_string_type; + const ::cutl_details_boost::regex_traits_wrapper& traits_inst = *(e.m_ptraits); + + // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never + // referenced + (void)traits_inst; + + // try and match a single character, could be a multi-character + // collating element... + for(i = 0; i < set_->csingles; ++i) + { + ptr = next; + if(*p == static_cast(0)) + { + // treat null string as special case: + if(traits_inst.translate(*ptr, icase) != *p) + { + while(*p == static_cast(0))++p; + continue; + } + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + } + else + { + while(*p && (ptr != last)) + { + if(traits_inst.translate(*ptr, icase) != *p) + break; + ++p; + ++ptr; + } + + if(*p == static_cast(0)) // if null we've matched + return set_->isnot ? next : (ptr == next) ? ++next : ptr; + + p = re_skip_past_null(p); // skip null + } + } + + charT col = traits_inst.translate(*next, icase); + + + if(set_->cranges || set_->cequivalents) + { + traits_string_type s1; + // + // try and match a range, NB only a single character can match + if(set_->cranges) + { + if((e.m_flags & regex_constants::collate) == 0) + s1.assign(1, col); + else + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform(a, a + 1); + } + for(i = 0; i < set_->cranges; ++i) + { + if(STR_COMP(s1, p) >= 0) + { + do{ ++p; }while(*p); + ++p; + if(STR_COMP(s1, p) <= 0) + return set_->isnot ? next : ++next; + } + else + { + // skip first string + do{ ++p; }while(*p); + ++p; + } + // skip second string + do{ ++p; }while(*p); + ++p; + } + } + // + // try and match an equivalence class, NB only a single character can match + if(set_->cequivalents) + { + charT a[2] = { col, charT(0), }; + s1 = traits_inst.transform_primary(a, a +1); + for(i = 0; i < set_->cequivalents; ++i) + { + if(STR_COMP(s1, p) == 0) + return set_->isnot ? next : ++next; + // skip string + do{ ++p; }while(*p); + ++p; + } + } + } + if(traits_inst.isctype(col, set_->cclasses) == true) + return set_->isnot ? next : ++next; + if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false)) + return set_->isnot ? next : ++next; + return set_->isnot ? ++next : next; +} + +template +class repeater_count +{ + repeater_count** stack; + repeater_count* next; + int state_id; + std::size_t count; // the number of iterations so far + BidiIterator start_pos; // where the last repeat started +public: + repeater_count(repeater_count** s) + { + stack = s; + next = 0; + state_id = -1; + count = 0; + } + repeater_count(int i, repeater_count** s, BidiIterator start) + : start_pos(start) + { + state_id = i; + stack = s; + next = *stack; + *stack = this; + if(state_id > next->state_id) + count = 0; + else + { + repeater_count* p = next; + while(p && (p->state_id != state_id)) + p = p->next; + if(p) + { + count = p->count; + start_pos = p->start_pos; + } + else + count = 0; + } + } + ~repeater_count() + { + if(next) + *stack = next; + } + std::size_t get_count() { return count; } + int get_id() { return state_id; } + std::size_t operator++() { return ++count; } + bool check_null_repeat(const BidiIterator& pos, std::size_t max) + { + // this is called when we are about to start a new repeat, + // if the last one was NULL move our count to max, + // otherwise save the current position. + bool result = (count == 0) ? false : (pos == start_pos); + if(result) + count = max; + else + start_pos = pos; + return result; + } +}; + +struct saved_state; + +enum saved_state_type +{ + saved_type_end = 0, + saved_type_paren = 1, + saved_type_recurse = 2, + saved_type_assertion = 3, + saved_state_alt = 4, + saved_state_repeater_count = 5, + saved_state_extra_block = 6, + saved_state_greedy_single_repeat = 7, + saved_state_rep_slow_dot = 8, + saved_state_rep_fast_dot = 9, + saved_state_rep_char = 10, + saved_state_rep_short_set = 11, + saved_state_rep_long_set = 12, + saved_state_non_greedy_long_repeat = 13, + saved_state_count = 14 +}; + +template +struct recursion_info +{ + typedef typename Results::value_type value_type; + typedef typename value_type::iterator iterator; + int idx; + const re_syntax_base* preturn_address; + Results results; + repeater_count* repeater_stack; +}; + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable : 4251 4231 4660) +#endif + +template +class perl_matcher +{ +public: + typedef typename traits::char_type char_type; + typedef perl_matcher self_type; + typedef bool (self_type::*matcher_proc_type)(void); + typedef std::size_t traits_size_type; + typedef typename is_byte::width_type width_type; + typedef typename regex_iterator_traits::difference_type difference_type; + typedef match_results results_type; + + perl_matcher(BidiIterator first, BidiIterator end, + match_results& what, + const basic_regex& e, + match_flag_type f, + BidiIterator l_base) + : m_result(what), base(first), last(end), + position(first), backstop(l_base), re(e), traits_inst(e.get_traits()), + m_independent(false), next_count(&rep_obj), rep_obj(&next_count) + { + construct_init(e, f); + } + + bool match(); + bool find(); + + void setf(match_flag_type f) + { m_match_flags |= f; } + void unsetf(match_flag_type f) + { m_match_flags &= ~f; } + +private: + void construct_init(const basic_regex& e, match_flag_type f); + + bool find_imp(); + bool match_imp(); +#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD + typedef bool (perl_matcher::*protected_proc_type)(); + bool protected_call(protected_proc_type); +#endif + void estimate_max_state_count(std::random_access_iterator_tag*); + void estimate_max_state_count(void*); + bool match_prefix(); + bool match_all_states(); + + // match procs, stored in s_match_vtable: + bool match_startmark(); + bool match_endmark(); + bool match_literal(); + bool match_start_line(); + bool match_end_line(); + bool match_wild(); + bool match_match(); + bool match_word_boundary(); + bool match_within_word(); + bool match_word_start(); + bool match_word_end(); + bool match_buffer_start(); + bool match_buffer_end(); + bool match_backref(); + bool match_long_set(); + bool match_set(); + bool match_jump(); + bool match_alt(); + bool match_rep(); + bool match_combining(); + bool match_soft_buffer_end(); + bool match_restart_continue(); + bool match_long_set_repeat(); + bool match_set_repeat(); + bool match_char_repeat(); + bool match_dot_repeat_fast(); + bool match_dot_repeat_slow(); + bool match_dot_repeat_dispatch() + { + return ::cutl_details_boost::is_random_access_iterator::value ? match_dot_repeat_fast() : match_dot_repeat_slow(); + } + bool match_backstep(); + bool match_assert_backref(); + bool match_toggle_case(); +#ifdef BOOST_REGEX_RECURSIVE + bool backtrack_till_match(std::size_t count); +#endif + bool match_recursion(); + + // find procs stored in s_find_vtable: + bool find_restart_any(); + bool find_restart_word(); + bool find_restart_line(); + bool find_restart_buf(); + bool find_restart_lit(); + +private: + // final result structure to be filled in: + match_results& m_result; + // temporary result for POSIX matches: + scoped_ptr > m_temp_match; + // pointer to actual result structure to fill in: + match_results* m_presult; + // start of sequence being searched: + BidiIterator base; + // end of sequence being searched: + BidiIterator last; + // current character being examined: + BidiIterator position; + // where to restart next search after failed match attempt: + BidiIterator restart; + // where the current search started from, acts as base for $` during grep: + BidiIterator search_base; + // how far we can go back when matching lookbehind: + BidiIterator backstop; + // the expression being examined: + const basic_regex& re; + // the expression's traits class: + const ::cutl_details_boost::regex_traits_wrapper& traits_inst; + // the next state in the machine being matched: + const re_syntax_base* pstate; + // matching flags in use: + match_flag_type m_match_flags; + // how many states we have examined so far: + std::ptrdiff_t state_count; + // max number of states to examine before giving up: + std::ptrdiff_t max_state_count; + // whether we should ignore case or not: + bool icase; + // set to true when (position == last), indicates that we may have a partial match: + bool m_has_partial_match; + // set to true whenever we get a match: + bool m_has_found_match; + // set to true whenever we're inside an independent sub-expression: + bool m_independent; + // the current repeat being examined: + repeater_count* next_count; + // the first repeat being examined (top of linked list): + repeater_count rep_obj; + // the mask to pass when matching word boundaries: + typename traits::char_class_type m_word_mask; + // the bitmask to use when determining whether a match_any matches a newline or not: + unsigned char match_any_mask; + // recursion information: + std::vector > recursion_stack; + +#ifdef BOOST_REGEX_NON_RECURSIVE + // + // additional members for non-recursive version: + // + typedef bool (self_type::*unwind_proc_type)(bool); + + void extend_stack(); + bool unwind(bool); + bool unwind_end(bool); + bool unwind_paren(bool); + bool unwind_recursion_stopper(bool); + bool unwind_assertion(bool); + bool unwind_alt(bool); + bool unwind_repeater_counter(bool); + bool unwind_extra_block(bool); + bool unwind_greedy_single_repeat(bool); + bool unwind_slow_dot_repeat(bool); + bool unwind_fast_dot_repeat(bool); + bool unwind_char_repeat(bool); + bool unwind_short_set_repeat(bool); + bool unwind_long_set_repeat(bool); + bool unwind_non_greedy_repeat(bool); + bool unwind_recursion(bool); + bool unwind_recursion_pop(bool); + void destroy_single_repeat(); + void push_matched_paren(int index, const sub_match& sub); + void push_recursion_stopper(); + void push_assertion(const re_syntax_base* ps, bool positive); + void push_alt(const re_syntax_base* ps); + void push_repeater_count(int i, repeater_count** s); + void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id); + void push_non_greedy_repeat(const re_syntax_base* ps); + void push_recursion(int idx, const re_syntax_base* p, results_type* presults); + void push_recursion_pop(); + + // pointer to base of stack: + saved_state* m_stack_base; + // pointer to current stack position: + saved_state* m_backup_state; + // determines what value to return when unwinding from recursion, + // allows for mixed recursive/non-recursive algorithm: + bool m_recursive_result; + // how many memory blocks have we used up?: + unsigned used_block_count; +#endif + + // these operations aren't allowed, so are declared private, + // bodies are provided to keep explicit-instantiation requests happy: + perl_matcher& operator=(const perl_matcher&) + { + return *this; + } + perl_matcher(const perl_matcher& that) + : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {} +}; + +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace re_detail + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +// +// include the implementation of perl_matcher: +// +#ifdef BOOST_REGEX_RECURSIVE +#include +#else +#include +#endif +// this one has to be last: +#include + +#endif + -- cgit v1.1