From b51965dddbed68f23c5e8c169c23c794313ce5f6 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Tue, 28 Jun 2011 17:17:23 +0200 Subject: Add boost subset as an implementation detail --- .../boost/regex/v4/perl_matcher_non_recursive.hpp | 1639 ++++++++++++++++++++ 1 file changed, 1639 insertions(+) create mode 100644 cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp (limited to 'cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp') diff --git a/cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp b/cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp new file mode 100644 index 0000000..e4b1e1e --- /dev/null +++ b/cutl/details/boost/regex/v4/perl_matcher_non_recursive.hpp @@ -0,0 +1,1639 @@ +/* + * + * Copyright (c) 2002 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE perl_matcher_common.cpp + * VERSION see + * DESCRIPTION: Definitions of perl_matcher member functions that are + * specific to the non-recursive implementation. + */ + +#ifndef BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP +#define BOOST_REGEX_V4_PERL_MATCHER_NON_RECURSIVE_HPP + +#include + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +#ifdef BOOST_MSVC +# pragma warning(push) +# pragma warning(disable: 4800) +#endif + +namespace cutl_details_boost{ +namespace re_detail{ + +template +inline void inplace_destroy(T* p) +{ + (void)p; // warning suppression + p->~T(); +} + +struct saved_state +{ + union{ + unsigned int state_id; + // this padding ensures correct alignment on 64-bit platforms: + std::size_t padding1; + std::ptrdiff_t padding2; + void* padding3; + }; + saved_state(unsigned i) : state_id(i) {} +}; + +template +struct saved_matched_paren : public saved_state +{ + int index; + sub_match sub; + saved_matched_paren(int i, const sub_match& s) : saved_state(1), index(i), sub(s){}; +}; + +template +struct saved_position : public saved_state +{ + const re_syntax_base* pstate; + BidiIterator position; + saved_position(const re_syntax_base* ps, BidiIterator pos, int i) : saved_state(i), pstate(ps), position(pos){}; +}; + +template +struct saved_assertion : public saved_position +{ + bool positive; + saved_assertion(bool p, const re_syntax_base* ps, BidiIterator pos) + : saved_position(ps, pos, saved_type_assertion), positive(p){}; +}; + +template +struct saved_repeater : public saved_state +{ + repeater_count count; + saved_repeater(int i, repeater_count** s, BidiIterator start) + : saved_state(saved_state_repeater_count), count(i,s,start){} +}; + +struct saved_extra_block : public saved_state +{ + saved_state *base, *end; + saved_extra_block(saved_state* b, saved_state* e) + : saved_state(saved_state_extra_block), base(b), end(e) {} +}; + +struct save_state_init +{ + saved_state** stack; + save_state_init(saved_state** base, saved_state** end) + : stack(base) + { + *base = static_cast(get_mem_block()); + *end = reinterpret_cast(reinterpret_cast(*base)+BOOST_REGEX_BLOCKSIZE); + --(*end); + (void) new (*end)saved_state(0); + BOOST_ASSERT(*end > *base); + } + ~save_state_init() + { + put_mem_block(*stack); + *stack = 0; + } +}; + +template +struct saved_single_repeat : public saved_state +{ + std::size_t count; + const re_repeat* rep; + BidiIterator last_position; + saved_single_repeat(std::size_t c, const re_repeat* r, BidiIterator lp, int arg_id) + : saved_state(arg_id), count(c), rep(r), last_position(lp){} +}; + +template +struct saved_recursion : public saved_state +{ + saved_recursion(int idx, const re_syntax_base* p, Results* pr) + : saved_state(14), recursion_id(idx), preturn_address(p), results(*pr) + {} + int recursion_id; + const re_syntax_base* preturn_address; + Results results; +}; + +template +bool perl_matcher::match_all_states() +{ + static matcher_proc_type const s_match_vtable[30] = + { + (&perl_matcher::match_startmark), + &perl_matcher::match_endmark, + &perl_matcher::match_literal, + &perl_matcher::match_start_line, + &perl_matcher::match_end_line, + &perl_matcher::match_wild, + &perl_matcher::match_match, + &perl_matcher::match_word_boundary, + &perl_matcher::match_within_word, + &perl_matcher::match_word_start, + &perl_matcher::match_word_end, + &perl_matcher::match_buffer_start, + &perl_matcher::match_buffer_end, + &perl_matcher::match_backref, + &perl_matcher::match_long_set, + &perl_matcher::match_set, + &perl_matcher::match_jump, + &perl_matcher::match_alt, + &perl_matcher::match_rep, + &perl_matcher::match_combining, + &perl_matcher::match_soft_buffer_end, + &perl_matcher::match_restart_continue, + // Although this next line *should* be evaluated at compile time, in practice + // some compilers (VC++) emit run-time initialisation which breaks thread + // safety, so use a dispatch function instead: + //(::cutl_details_boost::is_random_access_iterator::value ? &perl_matcher::match_dot_repeat_fast : &perl_matcher::match_dot_repeat_slow), + &perl_matcher::match_dot_repeat_dispatch, + &perl_matcher::match_char_repeat, + &perl_matcher::match_set_repeat, + &perl_matcher::match_long_set_repeat, + &perl_matcher::match_backstep, + &perl_matcher::match_assert_backref, + &perl_matcher::match_toggle_case, + &perl_matcher::match_recursion, + }; + + push_recursion_stopper(); + do{ + while(pstate) + { + matcher_proc_type proc = s_match_vtable[pstate->type]; + ++state_count; + if(!(this->*proc)()) + { + if(state_count > max_state_count) + raise_error(traits_inst, regex_constants::error_complexity); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + bool successful_unwind = unwind(false); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(false == successful_unwind) + return m_recursive_result; + } + } + }while(unwind(true)); + return m_recursive_result; +} + +template +void perl_matcher::extend_stack() +{ + if(used_block_count) + { + --used_block_count; + saved_state* stack_base; + saved_state* backup_state; + stack_base = static_cast(get_mem_block()); + backup_state = reinterpret_cast(reinterpret_cast(stack_base)+BOOST_REGEX_BLOCKSIZE); + saved_extra_block* block = static_cast(backup_state); + --block; + (void) new (block) saved_extra_block(m_stack_base, m_backup_state); + m_stack_base = stack_base; + m_backup_state = block; + } + else + raise_error(traits_inst, regex_constants::error_stack); +} + +template +inline void perl_matcher::push_matched_paren(int index, const sub_match& sub) +{ + //BOOST_ASSERT(index); + saved_matched_paren* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_matched_paren(index, sub); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion_stopper() +{ + saved_state* pmp = m_backup_state; + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = m_backup_state; + --pmp; + } + (void) new (pmp)saved_state(saved_type_recurse); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_assertion(const re_syntax_base* ps, bool positive) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_assertion(positive, ps, position); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_alt(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_alt); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_non_greedy_repeat(const re_syntax_base* ps) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(ps, position, saved_state_non_greedy_long_repeat); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_repeater_count(int i, repeater_count** s) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_repeater(i, s, position); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_single_repeat(c, r, last_position, state_id); + m_backup_state = pmp; +} + +template +inline void perl_matcher::push_recursion(int idx, const re_syntax_base* p, results_type* presults) +{ + saved_recursion* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_recursion(idx, p, presults); + m_backup_state = pmp; +} + +template +bool perl_matcher::match_startmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + switch(index) + { + case 0: + pstate = pstate->next.p; + break; + case -1: + case -2: + { + // forward lookahead assert: + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + push_assertion(next_pstate, index == -1); + break; + } + case -3: + { + // independent sub-expression, currently this is always recursive: + bool old_independent = m_independent; + m_independent = true; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + pstate = next_pstate; + m_independent = old_independent; +#ifdef BOOST_REGEX_MATCH_EXTRA + if(r && (m_match_flags & match_extra)) + { + // + // our captures have been stored in *m_presult + // we need to unpack them, and insert them + // back in the right order when we unwind the stack: + // + match_results temp_match(*m_presult); + unsigned i; + for(i = 0; i < temp_match.size(); ++i) + (*m_presult)[i].get_captures().clear(); + // match everything else: + r = match_all_states(); + // now place the stored captures back: + for(i = 0; i < temp_match.size(); ++i) + { + typedef typename sub_match::capture_sequence_type seq; + seq& s1 = (*m_presult)[i].get_captures(); + const seq& s2 = temp_match[i].captures(); + s1.insert( + s1.end(), + s2.begin(), + s2.end()); + } + } +#endif + return r; + } + case -4: + { + // conditional expression: + const re_alt* alt = static_cast(pstate->next.p); + BOOST_ASSERT(alt->type == syntax_element_alt); + pstate = alt->next.p; + if(pstate->type == syntax_element_assert_backref) + { + if(!match_assert_backref()) + pstate = alt->alt.p; + break; + } + else + { + // zero width assertion, have to match this recursively: + BOOST_ASSERT(pstate->type == syntax_element_startmark); + bool negated = static_cast(pstate)->index == -2; + BidiIterator saved_position = position; + const re_syntax_base* next_pstate = static_cast(pstate->next.p)->alt.p->next.p; + pstate = pstate->next.p->next.p; + bool r = match_all_states(); + position = saved_position; + if(negated) + r = !r; + if(r) + pstate = next_pstate; + else + pstate = alt->alt.p; + break; + } + } + case -5: + { + push_matched_paren(0, (*m_presult)[0]); + m_presult->set_first(position, 0, true); + pstate = pstate->next.p; + break; + } + default: + { + BOOST_ASSERT(index > 0); + if((m_match_flags & match_nosubs) == 0) + { + push_matched_paren(index, (*m_presult)[index]); + m_presult->set_first(position, index); + } + pstate = pstate->next.p; + break; + } + } + return true; +} + +template +bool perl_matcher::match_alt() +{ + bool take_first, take_second; + const re_alt* jmp = static_cast(pstate); + + // find out which of these two alternatives we need to take: + if(position == last) + { + take_first = jmp->can_be_null & mask_take; + take_second = jmp->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, jmp->_map, (unsigned char)mask_take); + take_second = can_start(*position, jmp->_map, (unsigned char)mask_skip); + } + + if(take_first) + { + // we can take the first alternative, + // see if we need to push next alternative: + if(take_second) + { + push_alt(jmp->alt.p); + } + pstate = pstate->next.p; + return true; + } + if(take_second) + { + pstate = jmp->alt.p; + return true; + } + return false; // neither option is possible +} + +template +bool perl_matcher::match_rep() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127 4244) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + + // find out which of these two alternatives we need to take: + bool take_first, take_second; + if(position == last) + { + take_first = rep->can_be_null & mask_take; + take_second = rep->can_be_null & mask_skip; + } + else + { + take_first = can_start(*position, rep->_map, (unsigned char)mask_take); + take_second = can_start(*position, rep->_map, (unsigned char)mask_skip); + } + + if((m_backup_state->state_id != saved_state_repeater_count) + || (static_cast*>(m_backup_state)->count.get_id() != rep->state_id) + || (next_count->get_id() != rep->state_id)) + { + // we're moving to a different repeat from the last + // one, so set up a counter object: + push_repeater_count(rep->state_id, &next_count); + } + // + // If we've had at least one repeat already, and the last one + // matched the NULL string then set the repeat count to + // maximum: + // + next_count->check_null_repeat(position, rep->max); + + if(next_count->get_count() < rep->min) + { + // we must take the repeat: + if(take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + return false; + } + + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // try and take the repeat if we can: + if((next_count->get_count() < rep->max) && take_first) + { + if(take_second) + { + // store position in case we fail: + push_alt(rep->alt.p); + } + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + else if(take_second) + { + pstate = rep->alt.p; + return true; + } + return false; // can't take anything, fail... + } + else // non-greedy + { + // try and skip the repeat if we can: + if(take_second) + { + if((next_count->get_count() < rep->max) && take_first) + { + // store position in case we fail: + push_non_greedy_repeat(rep->next.p); + } + pstate = rep->alt.p; + return true; + } + if((next_count->get_count() < rep->max) && take_first) + { + // increase the counter: + ++(*next_count); + pstate = rep->next.p; + return true; + } + } + return false; +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_dot_repeat_slow() +{ + unsigned count = 0; + const re_repeat* rep = static_cast(pstate); + re_syntax_base* psingle = rep->next.p; + // match compulsary repeats first: + while(count < rep->min) + { + pstate = psingle; + if(!match_wild()) + return false; + ++count; + } + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + if(greedy) + { + // repeat for as long as we can: + while(count < rep->max) + { + pstate = psingle; + if(!match_wild()) + break; + ++count; + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_slow_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_dot_repeat_fast() +{ + if(m_match_flags & match_not_dot_null) + return match_dot_repeat_slow(); + if((static_cast(pstate->next.p)->mask & match_any_mask) == 0) + return match_dot_repeat_slow(); + + const re_repeat* rep = static_cast(pstate); + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + unsigned count = static_cast((std::min)(static_cast(::cutl_details_boost::re_detail::distance(position, last)), static_cast(greedy ? rep->max : rep->min))); + if(rep->min > count) + { + position = last; + return false; // not enough text left to match + } + std::advance(position, count); + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_fast_dot); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +} + +template +bool perl_matcher::match_char_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + BOOST_ASSERT(1 == static_cast(rep->next.p)->length); + const char_type what = *reinterpret_cast(static_cast(rep->next.p) + 1); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (traits_inst.translate(*position, icase) == what)) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_char); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + const re_repeat* rep = static_cast(pstate); + const unsigned char* map = static_cast(rep->next.p)->_map; + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && map[static_cast(traits_inst.translate(*position, icase))]) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_short_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_long_set_repeat() +{ +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable:4127) +#endif +#ifdef __BORLANDC__ +#pragma option push -w-8008 -w-8066 -w-8004 +#endif + typedef typename traits::char_class_type mask_type; + const re_repeat* rep = static_cast(pstate); + const re_set_long* set = static_cast*>(pstate->next.p); + std::size_t count = 0; + // + // start by working out how much we can skip: + // + bool greedy = (rep->greedy) && (!(m_match_flags & regex_constants::match_any) || m_independent); + std::size_t desired = greedy ? rep->max : rep->min; + if(::cutl_details_boost::is_random_access_iterator::value) + { + BidiIterator end = position; + std::advance(end, (std::min)((std::size_t)::cutl_details_boost::re_detail::distance(position, last), desired)); + BidiIterator origin(position); + while((position != end) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + } + count = (unsigned)::cutl_details_boost::re_detail::distance(origin, position); + } + else + { + while((count < desired) && (position != last) && (position != re_is_set_member(position, last, set, re.get_data(), icase))) + { + ++position; + ++count; + } + } + + if(count < rep->min) + return false; + + if(greedy) + { + if((rep->leading) && (count < rep->max)) + restart = position; + // push backtrack info if available: + if(count - rep->min) + push_single_repeat(count, rep, position, saved_state_greedy_single_repeat); + // jump to next state: + pstate = rep->alt.p; + return true; + } + else + { + // non-greedy, push state and return true if we can skip: + if(count < rep->max) + push_single_repeat(count, rep, position, saved_state_rep_long_set); + pstate = rep->alt.p; + return (position == last) ? (rep->can_be_null & mask_skip) : can_start(*position, rep->_map, mask_skip); + } +#ifdef __BORLANDC__ +#pragma option pop +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif +} + +template +bool perl_matcher::match_recursion() +{ + BOOST_ASSERT(pstate->type == syntax_element_recurse); + // + // Backup call stack: + // + push_recursion_pop(); + // + // Set new call stack: + // + if(recursion_stack.capacity() == 0) + { + recursion_stack.reserve(50); + } + recursion_stack.push_back(recursion_info()); + recursion_stack.back().preturn_address = pstate->next.p; + recursion_stack.back().results = *m_presult; + if(static_cast(pstate)->state_id > 0) + { + push_repeater_count(static_cast(pstate)->state_id, &next_count); + } + pstate = static_cast(pstate)->alt.p; + recursion_stack.back().idx = static_cast(pstate)->index; + + return true; +} + +template +bool perl_matcher::match_endmark() +{ + int index = static_cast(pstate)->index; + icase = static_cast(pstate)->icase; + if(index > 0) + { + if((m_match_flags & match_nosubs) == 0) + { + m_presult->set_second(position, index); + } + if(!recursion_stack.empty()) + { + if(index == recursion_stack.back().idx) + { + pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, &recursion_stack.back().results); + recursion_stack.pop_back(); + } + } + } + else if((index < 0) && (index != -4)) + { + // matched forward lookahead: + pstate = 0; + return true; + } + pstate = pstate->next.p; + return true; +} + +template +bool perl_matcher::match_match() +{ + if(!recursion_stack.empty()) + { + BOOST_ASSERT(0 == recursion_stack.back().idx); + pstate = recursion_stack.back().preturn_address; + *m_presult = recursion_stack.back().results; + push_recursion(recursion_stack.back().idx, recursion_stack.back().preturn_address, &recursion_stack.back().results); + recursion_stack.pop_back(); + return true; + } + if((m_match_flags & match_not_null) && (position == (*m_presult)[0].first)) + return false; + if((m_match_flags & match_all) && (position != last)) + return false; + if((m_match_flags & regex_constants::match_not_initial_null) && (position == search_base)) + return false; + m_presult->set_second(position); + pstate = 0; + m_has_found_match = true; + if((m_match_flags & match_posix) == match_posix) + { + m_result.maybe_assign(*m_presult); + if((m_match_flags & match_any) == 0) + return false; + } +#ifdef BOOST_REGEX_MATCH_EXTRA + if(match_extra & m_match_flags) + { + for(unsigned i = 0; i < m_presult->size(); ++i) + if((*m_presult)[i].matched) + ((*m_presult)[i]).get_captures().push_back((*m_presult)[i]); + } +#endif + return true; +} + +/**************************************************************************** + +Unwind and associated proceedures follow, these perform what normal stack +unwinding does in the recursive implementation. + +****************************************************************************/ + +template +bool perl_matcher::unwind(bool have_match) +{ + static unwind_proc_type const s_unwind_table[18] = + { + &perl_matcher::unwind_end, + &perl_matcher::unwind_paren, + &perl_matcher::unwind_recursion_stopper, + &perl_matcher::unwind_assertion, + &perl_matcher::unwind_alt, + &perl_matcher::unwind_repeater_counter, + &perl_matcher::unwind_extra_block, + &perl_matcher::unwind_greedy_single_repeat, + &perl_matcher::unwind_slow_dot_repeat, + &perl_matcher::unwind_fast_dot_repeat, + &perl_matcher::unwind_char_repeat, + &perl_matcher::unwind_short_set_repeat, + &perl_matcher::unwind_long_set_repeat, + &perl_matcher::unwind_non_greedy_repeat, + &perl_matcher::unwind_recursion, + &perl_matcher::unwind_recursion_pop, + }; + + m_recursive_result = have_match; + unwind_proc_type unwinder; + bool cont; + // + // keep unwinding our stack until we have something to do: + // + do + { + unwinder = s_unwind_table[m_backup_state->state_id]; + cont = (this->*unwinder)(m_recursive_result); + }while(cont); + // + // return true if we have more states to try: + // + return pstate ? true : false; +} + +template +bool perl_matcher::unwind_end(bool) +{ + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_paren(bool have_match) +{ + saved_matched_paren* pmp = static_cast*>(m_backup_state); + // restore previous values if no match was found: + if(have_match == false) + { + m_presult->set_first(pmp->sub.first, pmp->index, pmp->index == 0); + m_presult->set_second(pmp->sub.second, pmp->index, pmp->sub.matched, pmp->index == 0); + } +#ifdef BOOST_REGEX_MATCH_EXTRA + // + // we have a match, push the capture information onto the stack: + // + else if(pmp->sub.matched && (match_extra & m_match_flags)) + ((*m_presult)[pmp->index]).get_captures().push_back(pmp->sub); +#endif + // unwind stack: + m_backup_state = pmp+1; + cutl_details_boost::re_detail::inplace_destroy(pmp); + return true; // keep looking +} + +template +bool perl_matcher::unwind_recursion_stopper(bool) +{ + cutl_details_boost::re_detail::inplace_destroy(m_backup_state++); + pstate = 0; // nothing left to search + return false; // end of stack nothing more to search +} + +template +bool perl_matcher::unwind_assertion(bool r) +{ + saved_assertion* pmp = static_cast*>(m_backup_state); + pstate = pmp->pstate; + position = pmp->position; + bool result = (r == pmp->positive); + m_recursive_result = pmp->positive ? r : !r; + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return !result; // return false if the assertion was matched to stop search. +} + +template +bool perl_matcher::unwind_alt(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + pstate = pmp->pstate; + position = pmp->position; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template +bool perl_matcher::unwind_repeater_counter(bool) +{ + saved_repeater* pmp = static_cast*>(m_backup_state); + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; // keep looking +} + +template +bool perl_matcher::unwind_extra_block(bool) +{ + saved_extra_block* pmp = static_cast(m_backup_state); + void* condemmed = m_stack_base; + m_stack_base = pmp->base; + m_backup_state = pmp->end; + cutl_details_boost::re_detail::inplace_destroy(pmp); + put_mem_block(condemmed); + return true; // keep looking +} + +template +inline void perl_matcher::destroy_single_repeat() +{ + saved_single_repeat* p = static_cast*>(m_backup_state); + cutl_details_boost::re_detail::inplace_destroy(p++); + m_backup_state = p; +} + +template +bool perl_matcher::unwind_greedy_single_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + + count -= rep->min; + + if((m_match_flags & match_partial) && (position == last)) + m_has_partial_match = true; + + BOOST_ASSERT(count); + position = pmp->last_position; + + // backtrack till we can skip out: + do + { + --position; + --count; + ++state_count; + }while(count && !can_start(*position, rep->_map, mask_skip)); + + // if we've hit base, destroy this state: + if(count == 0) + { + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count + rep->min; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_slow_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + BOOST_ASSERT(rep->type == syntax_element_dot_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_wild); + + BOOST_ASSERT(count < rep->max); + pstate = rep->next.p; + position = pmp->last_position; + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!match_wild()) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_fast_dot_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + + BOOST_ASSERT(count < rep->max); + position = pmp->last_position; + if(position != last) + { + + // wind forward until we can skip out of the repeat: + do + { + ++position; + ++count; + ++state_count; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_char_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const char_type what = *reinterpret_cast(static_cast(pstate) + 1); + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_char_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_literal); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(traits_inst.translate(*position, icase) != what) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_short_set_repeat(bool r) +{ + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const unsigned char* map = static_cast(rep->next.p)->_map; + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_short_set_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_set); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(!map[static_cast(traits_inst.translate(*position, icase))]) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++count; + ++ position; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_long_set_repeat(bool r) +{ + typedef typename traits::char_class_type mask_type; + saved_single_repeat* pmp = static_cast*>(m_backup_state); + + // if we have a match, just discard this state: + if(r) + { + destroy_single_repeat(); + return true; + } + + const re_repeat* rep = pmp->rep; + std::size_t count = pmp->count; + pstate = rep->next.p; + const re_set_long* set = static_cast*>(pstate); + position = pmp->last_position; + + BOOST_ASSERT(rep->type == syntax_element_long_set_rep); + BOOST_ASSERT(rep->next.p != 0); + BOOST_ASSERT(rep->alt.p != 0); + BOOST_ASSERT(rep->next.p->type == syntax_element_long_set); + BOOST_ASSERT(count < rep->max); + + if(position != last) + { + // wind forward until we can skip out of the repeat: + do + { + if(position == re_is_set_member(position, last, set, re.get_data(), icase)) + { + // failed repeat match, discard this state and look for another: + destroy_single_repeat(); + return true; + } + ++position; + ++count; + ++state_count; + pstate = rep->next.p; + }while((count < rep->max) && (position != last) && !can_start(*position, rep->_map, mask_skip)); + } + // remember where we got to if this is a leading repeat: + if((rep->leading) && (count < rep->max)) + restart = position; + if(position == last) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if((m_match_flags & match_partial) && (position == last) && (position != search_base)) + m_has_partial_match = true; + if(0 == (rep->can_be_null & mask_skip)) + return true; + } + else if(count == rep->max) + { + // can't repeat any more, remove the pushed state: + destroy_single_repeat(); + if(!can_start(*position, rep->_map, mask_skip)) + return true; + } + else + { + pmp->count = count; + pmp->last_position = position; + } + pstate = rep->alt.p; + return false; +} + +template +bool perl_matcher::unwind_non_greedy_repeat(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + position = pmp->position; + pstate = pmp->pstate; + ++(*next_count); + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return r; +} + +template +bool perl_matcher::unwind_recursion(bool r) +{ + saved_recursion* pmp = static_cast*>(m_backup_state); + if(!r) + { + recursion_stack.push_back(recursion_info()); + recursion_stack.back().idx = pmp->recursion_id; + recursion_stack.back().preturn_address = pmp->preturn_address; + recursion_stack.back().results = pmp->results; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +bool perl_matcher::unwind_recursion_pop(bool r) +{ + saved_state* pmp = static_cast(m_backup_state); + if(!r) + { + recursion_stack.pop_back(); + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +void perl_matcher::push_recursion_pop() +{ + saved_state* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(15); + m_backup_state = pmp; +} +/* +template +bool perl_matcher::unwind_parenthesis_pop(bool r) +{ + saved_state* pmp = static_cast(m_backup_state); + if(!r) + { + --parenthesis_stack_position; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +void perl_matcher::push_parenthesis_pop() +{ + saved_state* pmp = static_cast(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast(m_backup_state); + --pmp; + } + (void) new (pmp)saved_state(16); + m_backup_state = pmp; +} + +template +bool perl_matcher::unwind_parenthesis_push(bool r) +{ + saved_position* pmp = static_cast*>(m_backup_state); + if(!r) + { + parenthesis_stack[parenthesis_stack_position++] = pmp->position; + } + cutl_details_boost::re_detail::inplace_destroy(pmp++); + m_backup_state = pmp; + return true; +} + +template +inline void perl_matcher::push_parenthesis_push(BidiIterator p) +{ + saved_position* pmp = static_cast*>(m_backup_state); + --pmp; + if(pmp < m_stack_base) + { + extend_stack(); + pmp = static_cast*>(m_backup_state); + --pmp; + } + (void) new (pmp)saved_position(0, p, 17); + m_backup_state = pmp; +} +*/ +} // namespace re_detail +} // namespace cutl_details_boost + +#ifdef BOOST_MSVC +# pragma warning(pop) +#endif + +#ifdef BOOST_MSVC +#pragma warning(push) +#pragma warning(disable: 4103) +#endif +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif +#ifdef BOOST_MSVC +#pragma warning(pop) +#endif + +#endif + + -- cgit v1.1