From 8e761289a2446367267c6c0d9a26e734f0f78306 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 16 Dec 2020 20:29:05 +0300 Subject: Get rid of legacy build systems and rename cutl/ to libcutl/ --- cutl/details/boost/regex/pending/object_cache.hpp | 165 ----- cutl/details/boost/regex/pending/static_mutex.hpp | 179 ----- .../boost/regex/pending/unicode_iterator.hpp | 776 --------------------- 3 files changed, 1120 deletions(-) delete mode 100644 cutl/details/boost/regex/pending/object_cache.hpp delete mode 100644 cutl/details/boost/regex/pending/static_mutex.hpp delete mode 100644 cutl/details/boost/regex/pending/unicode_iterator.hpp (limited to 'cutl/details/boost/regex/pending') diff --git a/cutl/details/boost/regex/pending/object_cache.hpp b/cutl/details/boost/regex/pending/object_cache.hpp deleted file mode 100644 index 2bf2754..0000000 --- a/cutl/details/boost/regex/pending/object_cache.hpp +++ /dev/null @@ -1,165 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE object_cache.hpp - * VERSION see - * DESCRIPTION: Implements a generic object cache. - */ - -#ifndef BOOST_REGEX_OBJECT_CACHE_HPP -#define BOOST_REGEX_OBJECT_CACHE_HPP - -#include -#include -#include -#include -#include -#include -#ifdef BOOST_HAS_THREADS -#include -#endif - -namespace cutl_details_boost{ - -template -class object_cache -{ -public: - typedef std::pair< ::cutl_details_boost::shared_ptr, Key const*> value_type; - typedef std::list list_type; - typedef typename list_type::iterator list_iterator; - typedef std::map map_type; - typedef typename map_type::iterator map_iterator; - typedef typename list_type::size_type size_type; - static cutl_details_boost::shared_ptr get(const Key& k, size_type l_max_cache_size); - -private: - static cutl_details_boost::shared_ptr do_get(const Key& k, size_type l_max_cache_size); - - struct data - { - list_type cont; - map_type index; - }; - - // Needed by compilers not implementing the resolution to DR45. For reference, - // see http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#45. - friend struct data; -}; - -template -cutl_details_boost::shared_ptr object_cache::get(const Key& k, size_type l_max_cache_size) -{ -#ifdef BOOST_HAS_THREADS - static cutl_details_boost::static_mutex mut = BOOST_STATIC_MUTEX_INIT; - - cutl_details_boost::static_mutex::scoped_lock l(mut); - if(l) - { - return do_get(k, l_max_cache_size); - } - // - // what do we do if the lock fails? - // for now just throw, but we should never really get here... - // - ::cutl_details_boost::throw_exception(std::runtime_error("Error in thread safety code: could not acquire a lock")); -#if defined(BOOST_NO_UNREACHABLE_RETURN_DETECTION) || defined(BOOST_NO_EXCEPTIONS) - return cutl_details_boost::shared_ptr(); -#endif -#else - return do_get(k, l_max_cache_size); -#endif -} - -template -cutl_details_boost::shared_ptr object_cache::do_get(const Key& k, size_type l_max_cache_size) -{ - typedef typename object_cache::data object_data; - typedef typename map_type::size_type map_size_type; - static object_data s_data; - - // - // see if the object is already in the cache: - // - map_iterator mpos = s_data.index.find(k); - if(mpos != s_data.index.end()) - { - // - // Eureka! - // We have a cached item, bump it up the list and return it: - // - if(--(s_data.cont.end()) != mpos->second) - { - // splice out the item we want to move: - list_type temp; - temp.splice(temp.end(), s_data.cont, mpos->second); - // and now place it at the end of the list: - s_data.cont.splice(s_data.cont.end(), temp, temp.begin()); - BOOST_ASSERT(*(s_data.cont.back().second) == k); - // update index with new position: - mpos->second = --(s_data.cont.end()); - BOOST_ASSERT(&(mpos->first) == mpos->second->second); - BOOST_ASSERT(&(mpos->first) == s_data.cont.back().second); - } - return s_data.cont.back().first; - } - // - // if we get here then the item is not in the cache, - // so create it: - // - cutl_details_boost::shared_ptr result(new Object(k)); - // - // Add it to the list, and index it: - // - s_data.cont.push_back(value_type(result, static_cast(0))); - s_data.index.insert(std::make_pair(k, --(s_data.cont.end()))); - s_data.cont.back().second = &(s_data.index.find(k)->first); - map_size_type s = s_data.index.size(); - BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); - BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); - BOOST_ASSERT(s_data.index.find(k)->first == k); - if(s > l_max_cache_size) - { - // - // We have too many items in the list, so we need to start - // popping them off the back of the list, but only if they're - // being held uniquely by us: - // - list_iterator pos = s_data.cont.begin(); - list_iterator last = s_data.cont.end(); - while((pos != last) && (s > l_max_cache_size)) - { - if(pos->first.unique()) - { - list_iterator condemmed(pos); - ++pos; - // now remove the items from our containers, - // then order has to be as follows: - BOOST_ASSERT(s_data.index.find(*(condemmed->second)) != s_data.index.end()); - s_data.index.erase(*(condemmed->second)); - s_data.cont.erase(condemmed); - --s; - } - else - ++pos; - } - BOOST_ASSERT(s_data.index[k]->first.get() == result.get()); - BOOST_ASSERT(&(s_data.index.find(k)->first) == s_data.cont.back().second); - BOOST_ASSERT(s_data.index.find(k)->first == k); - } - return result; -} - -} - -#endif diff --git a/cutl/details/boost/regex/pending/static_mutex.hpp b/cutl/details/boost/regex/pending/static_mutex.hpp deleted file mode 100644 index d3a0307..0000000 --- a/cutl/details/boost/regex/pending/static_mutex.hpp +++ /dev/null @@ -1,179 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE static_mutex.hpp - * VERSION see - * DESCRIPTION: Declares static_mutex lock type, there are three different - * implementations: POSIX pthreads, WIN32 threads, and portable, - * these are described in more detail below. - */ - -#ifndef BOOST_REGEX_STATIC_MUTEX_HPP -#define BOOST_REGEX_STATIC_MUTEX_HPP - -#include -#include // dll import/export options. - -#ifdef BOOST_HAS_PTHREADS -#include -#endif - -#if defined(BOOST_HAS_PTHREADS) && defined(PTHREAD_MUTEX_INITIALIZER) -// -// pthreads version: -// simple wrap around a pthread_mutex_t initialized with -// PTHREAD_MUTEX_INITIALIZER. -// -namespace cutl_details_boost{ - -class static_mutex; - -#define BOOST_STATIC_MUTEX_INIT { PTHREAD_MUTEX_INITIALIZER, } - -class BOOST_REGEX_DECL scoped_static_mutex_lock -{ -public: - scoped_static_mutex_lock(static_mutex& mut, bool lk = true); - ~scoped_static_mutex_lock(); - inline bool locked()const - { - return m_have_lock; - } - inline operator void const*()const - { - return locked() ? this : 0; - } - void lock(); - void unlock(); -private: - static_mutex& m_mutex; - bool m_have_lock; -}; - -class static_mutex -{ -public: - typedef scoped_static_mutex_lock scoped_lock; - pthread_mutex_t m_mutex; -}; - -} // namespace cutl_details_boost -#elif defined(BOOST_HAS_WINTHREADS) -// -// Win32 version: -// Use a 32-bit int as a lock, along with a test-and-set -// implementation using InterlockedCompareExchange. -// - -#include - -namespace cutl_details_boost{ - -class BOOST_REGEX_DECL scoped_static_mutex_lock; - -class static_mutex -{ -public: - typedef scoped_static_mutex_lock scoped_lock; - cutl_details_boost::int32_t m_mutex; -}; - -#define BOOST_STATIC_MUTEX_INIT { 0, } - -class BOOST_REGEX_DECL scoped_static_mutex_lock -{ -public: - scoped_static_mutex_lock(static_mutex& mut, bool lk = true); - ~scoped_static_mutex_lock(); - operator void const*()const - { - return locked() ? this : 0; - } - bool locked()const - { - return m_have_lock; - } - void lock(); - void unlock(); -private: - static_mutex& m_mutex; - bool m_have_lock; - scoped_static_mutex_lock(const scoped_static_mutex_lock&); - scoped_static_mutex_lock& operator=(const scoped_static_mutex_lock&); -}; - -} // namespace - -#else -// -// Portable version of a static mutex based on Boost.Thread library: -// This has to use a single mutex shared by all instances of static_mutex -// because cutl_details_boost::call_once doesn't alow us to pass instance information -// down to the initialisation proceedure. In fact the initialisation routine -// may need to be called more than once - but only once per instance. -// -// Since this preprocessor path is almost never taken, we hide these header -// dependencies so that build tools don't find them. -// -#define B1 -#define B2 -#include B1 -#include B2 -#undef B1 -#undef B2 - -namespace cutl_details_boost{ - -class BOOST_REGEX_DECL scoped_static_mutex_lock; -extern "C" BOOST_REGEX_DECL void cutl_details_boost_regex_free_static_mutex(); - -class BOOST_REGEX_DECL static_mutex -{ -public: - typedef scoped_static_mutex_lock scoped_lock; - static void init(); - static cutl_details_boost::recursive_mutex* m_pmutex; - static cutl_details_boost::once_flag m_once; -}; - -#define BOOST_STATIC_MUTEX_INIT { } - -class BOOST_REGEX_DECL scoped_static_mutex_lock -{ -public: - scoped_static_mutex_lock(static_mutex& mut, bool lk = true); - ~scoped_static_mutex_lock(); - operator void const*()const; - bool locked()const; - void lock(); - void unlock(); -private: - cutl_details_boost::recursive_mutex::scoped_lock* m_plock; - bool m_have_lock; -}; - -inline scoped_static_mutex_lock::operator void const*()const -{ - return locked() ? this : 0; -} - -inline bool scoped_static_mutex_lock::locked()const -{ - return m_have_lock; -} - -} // namespace - -#endif - -#endif diff --git a/cutl/details/boost/regex/pending/unicode_iterator.hpp b/cutl/details/boost/regex/pending/unicode_iterator.hpp deleted file mode 100644 index 467afc7..0000000 --- a/cutl/details/boost/regex/pending/unicode_iterator.hpp +++ /dev/null @@ -1,776 +0,0 @@ -/* - * - * Copyright (c) 2004 - * John Maddock - * - * Use, modification and distribution are subject to the - * Boost Software License, Version 1.0. (See accompanying file - * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - * - */ - - /* - * LOCATION: see http://www.boost.org for most recent version. - * FILE unicode_iterator.hpp - * VERSION see - * DESCRIPTION: Iterator adapters for converting between different Unicode encodings. - */ - -/**************************************************************************** - -Contents: -~~~~~~~~~ - -1) Read Only, Input Adapters: -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -template -class u32_to_u8_iterator; - -Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-8. - -template -class u8_to_u32_iterator; - -Adapts sequence of UTF-8 code points to "look like" a sequence of UTF-32. - -template -class u32_to_u16_iterator; - -Adapts sequence of UTF-32 code points to "look like" a sequence of UTF-16. - -template -class u16_to_u32_iterator; - -Adapts sequence of UTF-16 code points to "look like" a sequence of UTF-32. - -2) Single pass output iterator adapters: - -template -class utf8_output_iterator; - -Accepts UTF-32 code points and forwards them on as UTF-8 code points. - -template -class utf16_output_iterator; - -Accepts UTF-32 code points and forwards them on as UTF-16 code points. - -****************************************************************************/ - -#ifndef BOOST_REGEX_UNICODE_ITERATOR_HPP -#define BOOST_REGEX_UNICODE_ITERATOR_HPP -#include -#include -#include -#include -#include -#include -#ifndef BOOST_NO_STD_LOCALE -#include -#include -#endif -#include // CHAR_BIT - -namespace cutl_details_boost{ - -namespace detail{ - -static const ::cutl_details_boost::uint16_t high_surrogate_base = 0xD7C0u; -static const ::cutl_details_boost::uint16_t low_surrogate_base = 0xDC00u; -static const ::cutl_details_boost::uint32_t ten_bit_mask = 0x3FFu; - -inline bool is_high_surrogate(::cutl_details_boost::uint16_t v) -{ - return (v & 0xFFFFFC00u) == 0xd800u; -} -inline bool is_low_surrogate(::cutl_details_boost::uint16_t v) -{ - return (v & 0xFFFFFC00u) == 0xdc00u; -} -template -inline bool is_surrogate(T v) -{ - return (v & 0xFFFFF800u) == 0xd800; -} - -inline unsigned utf8_byte_count(cutl_details_boost::uint8_t c) -{ - // if the most significant bit with a zero in it is in position - // 8-N then there are N bytes in this UTF-8 sequence: - cutl_details_boost::uint8_t mask = 0x80u; - unsigned result = 0; - while(c & mask) - { - ++result; - mask >>= 1; - } - return (result == 0) ? 1 : ((result > 4) ? 4 : result); -} - -inline unsigned utf8_trailing_byte_count(cutl_details_boost::uint8_t c) -{ - return utf8_byte_count(c) - 1; -} - -#ifdef BOOST_MSVC -#pragma warning(push) -#pragma warning(disable:4100) -#endif -inline void invalid_utf32_code_point(::cutl_details_boost::uint32_t val) -{ -#ifndef BOOST_NO_STD_LOCALE - std::stringstream ss; - ss << "Invalid UTF-32 code point U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-16 sequence"; - std::out_of_range e(ss.str()); -#else - std::out_of_range e("Invalid UTF-32 code point encountered while trying to encode UTF-16 sequence"); -#endif - cutl_details_boost::throw_exception(e); -} -#ifdef BOOST_MSVC -#pragma warning(pop) -#endif - - -} // namespace detail - -template -class u32_to_u16_iterator - : public cutl_details_boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> -{ - typedef cutl_details_boost::iterator_facade, U16Type, std::bidirectional_iterator_tag, const U16Type> base_type; - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); - BOOST_STATIC_ASSERT(sizeof(U16Type)*CHAR_BIT == 16); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_current == 2) - extract_current(); - return m_values[m_current]; - } - bool equal(const u32_to_u16_iterator& that)const - { - if(m_position == that.m_position) - { - // Both m_currents must be equal, or both even - // this is the same as saying their sum must be even: - return (m_current + that.m_current) & 1u ? false : true; - } - return false; - } - void increment() - { - // if we have a pending read then read now, so that we know whether - // to skip a position, or move to a low-surrogate: - if(m_current == 2) - { - // pending read: - extract_current(); - } - // move to the next surrogate position: - ++m_current; - // if we've reached the end skip a position: - if(m_values[m_current] == 0) - { - m_current = 2; - ++m_position; - } - } - void decrement() - { - if(m_current != 1) - { - // decrementing an iterator always leads to a valid position: - --m_position; - extract_current(); - m_current = m_values[1] ? 1 : 0; - } - else - { - m_current = 0; - } - } - BaseIterator base()const - { - return m_position; - } - // construct: - u32_to_u16_iterator() : m_position(), m_current(0) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - } - u32_to_u16_iterator(BaseIterator b) : m_position(b), m_current(2) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - } -private: - - void extract_current()const - { - // begin by checking for a code point out of range: - ::cutl_details_boost::uint32_t v = *m_position; - if(v >= 0x10000u) - { - if(v > 0x10FFFFu) - detail::invalid_utf32_code_point(*m_position); - // split into two surrogates: - m_values[0] = static_cast(v >> 10) + detail::high_surrogate_base; - m_values[1] = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; - m_current = 0; - BOOST_ASSERT(detail::is_high_surrogate(m_values[0])); - BOOST_ASSERT(detail::is_low_surrogate(m_values[1])); - } - else - { - // 16-bit code point: - m_values[0] = static_cast(*m_position); - m_values[1] = 0; - m_current = 0; - // value must not be a surrogate: - if(detail::is_surrogate(m_values[0])) - detail::invalid_utf32_code_point(*m_position); - } - } - BaseIterator m_position; - mutable U16Type m_values[3]; - mutable unsigned m_current; -}; - -template -class u16_to_u32_iterator - : public cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> -{ - typedef cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; - // special values for pending iterator reads: - BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 16); - BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_value == pending_read) - extract_current(); - return m_value; - } - bool equal(const u16_to_u32_iterator& that)const - { - return m_position == that.m_position; - } - void increment() - { - // skip high surrogate first if there is one: - if(detail::is_high_surrogate(*m_position)) ++m_position; - ++m_position; - m_value = pending_read; - } - void decrement() - { - --m_position; - // if we have a low surrogate then go back one more: - if(detail::is_low_surrogate(*m_position)) - --m_position; - m_value = pending_read; - } - BaseIterator base()const - { - return m_position; - } - // construct: - u16_to_u32_iterator() : m_position() - { - m_value = pending_read; - } - u16_to_u32_iterator(BaseIterator b) : m_position(b) - { - m_value = pending_read; - } - // - // Range checked version: - // - u16_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) - { - m_value = pending_read; - // - // The range must not start with a low surrogate, or end in a high surrogate, - // otherwise we run the risk of running outside the underlying input range. - // Likewise b must not be located at a low surrogate. - // - cutl_details_boost::uint16_t val; - if(start != end) - { - if((b != start) && (b != end)) - { - val = *b; - if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) - invalid_code_point(val); - } - val = *start; - if(detail::is_surrogate(val) && ((val & 0xFC00u) == 0xDC00u)) - invalid_code_point(val); - val = *--end; - if(detail::is_high_surrogate(val)) - invalid_code_point(val); - } - } -private: - static void invalid_code_point(::cutl_details_boost::uint16_t val) - { -#ifndef BOOST_NO_STD_LOCALE - std::stringstream ss; - ss << "Misplaced UTF-16 surrogate U+" << std::showbase << std::hex << val << " encountered while trying to encode UTF-32 sequence"; - std::out_of_range e(ss.str()); -#else - std::out_of_range e("Misplaced UTF-16 surrogate encountered while trying to encode UTF-32 sequence"); -#endif - cutl_details_boost::throw_exception(e); - } - void extract_current()const - { - m_value = static_cast(static_cast< ::cutl_details_boost::uint16_t>(*m_position)); - // if the last value is a high surrogate then adjust m_position and m_value as needed: - if(detail::is_high_surrogate(*m_position)) - { - // precondition; next value must have be a low-surrogate: - BaseIterator next(m_position); - ::cutl_details_boost::uint16_t t = *++next; - if((t & 0xFC00u) != 0xDC00u) - invalid_code_point(t); - m_value = (m_value - detail::high_surrogate_base) << 10; - m_value |= (static_cast(static_cast< ::cutl_details_boost::uint16_t>(t)) & detail::ten_bit_mask); - } - // postcondition; result must not be a surrogate: - if(detail::is_surrogate(m_value)) - invalid_code_point(static_cast< ::cutl_details_boost::uint16_t>(m_value)); - } - BaseIterator m_position; - mutable U32Type m_value; -}; - -template -class u32_to_u8_iterator - : public cutl_details_boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> -{ - typedef cutl_details_boost::iterator_facade, U8Type, std::bidirectional_iterator_tag, const U8Type> base_type; - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 32); - BOOST_STATIC_ASSERT(sizeof(U8Type)*CHAR_BIT == 8); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_current == 4) - extract_current(); - return m_values[m_current]; - } - bool equal(const u32_to_u8_iterator& that)const - { - if(m_position == that.m_position) - { - // either the m_current's must be equal, or one must be 0 and - // the other 4: which means neither must have bits 1 or 2 set: - return (m_current == that.m_current) - || (((m_current | that.m_current) & 3) == 0); - } - return false; - } - void increment() - { - // if we have a pending read then read now, so that we know whether - // to skip a position, or move to a low-surrogate: - if(m_current == 4) - { - // pending read: - extract_current(); - } - // move to the next surrogate position: - ++m_current; - // if we've reached the end skip a position: - if(m_values[m_current] == 0) - { - m_current = 4; - ++m_position; - } - } - void decrement() - { - if((m_current & 3) == 0) - { - --m_position; - extract_current(); - m_current = 3; - while(m_current && (m_values[m_current] == 0)) - --m_current; - } - else - --m_current; - } - BaseIterator base()const - { - return m_position; - } - // construct: - u32_to_u8_iterator() : m_position(), m_current(0) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - m_values[3] = 0; - m_values[4] = 0; - } - u32_to_u8_iterator(BaseIterator b) : m_position(b), m_current(4) - { - m_values[0] = 0; - m_values[1] = 0; - m_values[2] = 0; - m_values[3] = 0; - m_values[4] = 0; - } -private: - - void extract_current()const - { - cutl_details_boost::uint32_t c = *m_position; - if(c > 0x10FFFFu) - detail::invalid_utf32_code_point(c); - if(c < 0x80u) - { - m_values[0] = static_cast(c); - m_values[1] = static_cast(0u); - m_values[2] = static_cast(0u); - m_values[3] = static_cast(0u); - } - else if(c < 0x800u) - { - m_values[0] = static_cast(0xC0u + (c >> 6)); - m_values[1] = static_cast(0x80u + (c & 0x3Fu)); - m_values[2] = static_cast(0u); - m_values[3] = static_cast(0u); - } - else if(c < 0x10000u) - { - m_values[0] = static_cast(0xE0u + (c >> 12)); - m_values[1] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - m_values[2] = static_cast(0x80u + (c & 0x3Fu)); - m_values[3] = static_cast(0u); - } - else - { - m_values[0] = static_cast(0xF0u + (c >> 18)); - m_values[1] = static_cast(0x80u + ((c >> 12) & 0x3Fu)); - m_values[2] = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - m_values[3] = static_cast(0x80u + (c & 0x3Fu)); - } - m_current= 0; - } - BaseIterator m_position; - mutable U8Type m_values[5]; - mutable unsigned m_current; -}; - -template -class u8_to_u32_iterator - : public cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> -{ - typedef cutl_details_boost::iterator_facade, U32Type, std::bidirectional_iterator_tag, const U32Type> base_type; - // special values for pending iterator reads: - BOOST_STATIC_CONSTANT(U32Type, pending_read = 0xffffffffu); - -#if !defined(BOOST_NO_STD_ITERATOR_TRAITS) && !defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION) - typedef typename std::iterator_traits::value_type base_value_type; - - BOOST_STATIC_ASSERT(sizeof(base_value_type)*CHAR_BIT == 8); - BOOST_STATIC_ASSERT(sizeof(U32Type)*CHAR_BIT == 32); -#endif - -public: - typename base_type::reference - dereference()const - { - if(m_value == pending_read) - extract_current(); - return m_value; - } - bool equal(const u8_to_u32_iterator& that)const - { - return m_position == that.m_position; - } - void increment() - { - // We must not start with a continuation character: - if((static_cast(*m_position) & 0xC0) == 0x80) - invalid_sequence(); - // skip high surrogate first if there is one: - unsigned c = detail::utf8_byte_count(*m_position); - if(m_value == pending_read) - { - // Since we haven't read in a value, we need to validate the code points: - for(unsigned i = 0; i < c; ++i) - { - ++m_position; - // We must have a continuation byte: - if((i != c - 1) && ((static_cast(*m_position) & 0xC0) != 0x80)) - invalid_sequence(); - } - } - else - { - std::advance(m_position, c); - } - m_value = pending_read; - } - void decrement() - { - // Keep backtracking until we don't have a trailing character: - unsigned count = 0; - while((*--m_position & 0xC0u) == 0x80u) ++count; - // now check that the sequence was valid: - if(count != detail::utf8_trailing_byte_count(*m_position)) - invalid_sequence(); - m_value = pending_read; - } - BaseIterator base()const - { - return m_position; - } - // construct: - u8_to_u32_iterator() : m_position() - { - m_value = pending_read; - } - u8_to_u32_iterator(BaseIterator b) : m_position(b) - { - m_value = pending_read; - } - // - // Checked constructor: - // - u8_to_u32_iterator(BaseIterator b, BaseIterator start, BaseIterator end) : m_position(b) - { - m_value = pending_read; - // - // We must not start with a continuation character, or end with a - // truncated UTF-8 sequence otherwise we run the risk of going past - // the start/end of the underlying sequence: - // - if(start != end) - { - unsigned char v = *start; - if((v & 0xC0u) == 0x80u) - invalid_sequence(); - if((b != start) && (b != end) && ((*b & 0xC0u) == 0x80u)) - invalid_sequence(); - BaseIterator pos = end; - do - { - v = *--pos; - } - while((start != pos) && ((v & 0xC0u) == 0x80u)); - std::ptrdiff_t extra = detail::utf8_byte_count(v); - if(std::distance(pos, end) < extra) - invalid_sequence(); - } - } -private: - static void invalid_sequence() - { - std::out_of_range e("Invalid UTF-8 sequence encountered while trying to encode UTF-32 character"); - cutl_details_boost::throw_exception(e); - } - void extract_current()const - { - m_value = static_cast(static_cast< ::cutl_details_boost::uint8_t>(*m_position)); - // we must not have a continuation character: - if((m_value & 0xC0u) == 0x80u) - invalid_sequence(); - // see how many extra bytes we have: - unsigned extra = detail::utf8_trailing_byte_count(*m_position); - // extract the extra bits, 6 from each extra byte: - BaseIterator next(m_position); - for(unsigned c = 0; c < extra; ++c) - { - ++next; - m_value <<= 6; - // We must have a continuation byte: - if((static_cast(*next) & 0xC0) != 0x80) - invalid_sequence(); - m_value += static_cast(*next) & 0x3Fu; - } - // we now need to remove a few of the leftmost bits, but how many depends - // upon how many extra bytes we've extracted: - static const cutl_details_boost::uint32_t masks[4] = - { - 0x7Fu, - 0x7FFu, - 0xFFFFu, - 0x1FFFFFu, - }; - m_value &= masks[extra]; - // check the result: - if(m_value > static_cast(0x10FFFFu)) - invalid_sequence(); - } - BaseIterator m_position; - mutable U32Type m_value; -}; - -template -class utf16_output_iterator -{ -public: - typedef void difference_type; - typedef void value_type; - typedef cutl_details_boost::uint32_t* pointer; - typedef cutl_details_boost::uint32_t& reference; - typedef std::output_iterator_tag iterator_category; - - utf16_output_iterator(const BaseIterator& b) - : m_position(b){} - utf16_output_iterator(const utf16_output_iterator& that) - : m_position(that.m_position){} - utf16_output_iterator& operator=(const utf16_output_iterator& that) - { - m_position = that.m_position; - return *this; - } - const utf16_output_iterator& operator*()const - { - return *this; - } - void operator=(cutl_details_boost::uint32_t val)const - { - push(val); - } - utf16_output_iterator& operator++() - { - return *this; - } - utf16_output_iterator& operator++(int) - { - return *this; - } - BaseIterator base()const - { - return m_position; - } -private: - void push(cutl_details_boost::uint32_t v)const - { - if(v >= 0x10000u) - { - // begin by checking for a code point out of range: - if(v > 0x10FFFFu) - detail::invalid_utf32_code_point(v); - // split into two surrogates: - *m_position++ = static_cast(v >> 10) + detail::high_surrogate_base; - *m_position++ = static_cast(v & detail::ten_bit_mask) + detail::low_surrogate_base; - } - else - { - // 16-bit code point: - // value must not be a surrogate: - if(detail::is_surrogate(v)) - detail::invalid_utf32_code_point(v); - *m_position++ = static_cast(v); - } - } - mutable BaseIterator m_position; -}; - -template -class utf8_output_iterator -{ -public: - typedef void difference_type; - typedef void value_type; - typedef cutl_details_boost::uint32_t* pointer; - typedef cutl_details_boost::uint32_t& reference; - typedef std::output_iterator_tag iterator_category; - - utf8_output_iterator(const BaseIterator& b) - : m_position(b){} - utf8_output_iterator(const utf8_output_iterator& that) - : m_position(that.m_position){} - utf8_output_iterator& operator=(const utf8_output_iterator& that) - { - m_position = that.m_position; - return *this; - } - const utf8_output_iterator& operator*()const - { - return *this; - } - void operator=(cutl_details_boost::uint32_t val)const - { - push(val); - } - utf8_output_iterator& operator++() - { - return *this; - } - utf8_output_iterator& operator++(int) - { - return *this; - } - BaseIterator base()const - { - return m_position; - } -private: - void push(cutl_details_boost::uint32_t c)const - { - if(c > 0x10FFFFu) - detail::invalid_utf32_code_point(c); - if(c < 0x80u) - { - *m_position++ = static_cast(c); - } - else if(c < 0x800u) - { - *m_position++ = static_cast(0xC0u + (c >> 6)); - *m_position++ = static_cast(0x80u + (c & 0x3Fu)); - } - else if(c < 0x10000u) - { - *m_position++ = static_cast(0xE0u + (c >> 12)); - *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - *m_position++ = static_cast(0x80u + (c & 0x3Fu)); - } - else - { - *m_position++ = static_cast(0xF0u + (c >> 18)); - *m_position++ = static_cast(0x80u + ((c >> 12) & 0x3Fu)); - *m_position++ = static_cast(0x80u + ((c >> 6) & 0x3Fu)); - *m_position++ = static_cast(0x80u + (c & 0x3Fu)); - } - } - mutable BaseIterator m_position; -}; - -} // namespace cutl_details_boost - -#endif // BOOST_REGEX_UNICODE_ITERATOR_HPP - -- cgit v1.1