summaryrefslogtreecommitdiff
path: root/libxsd/xsd/cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libxsd/xsd/cxx')
-rw-r--r--libxsd/xsd/cxx/xml/char-iso8859-1.hxx72
-rw-r--r--libxsd/xsd/cxx/xml/char-iso8859-1.txx101
-rw-r--r--libxsd/xsd/cxx/xml/char-lcp.hxx56
-rw-r--r--libxsd/xsd/cxx/xml/char-lcp.txx55
-rw-r--r--libxsd/xsd/cxx/xml/char-utf8.hxx57
-rw-r--r--libxsd/xsd/cxx/xml/char-utf8.txx293
-rw-r--r--libxsd/xsd/cxx/xml/exceptions.hxx20
-rw-r--r--libxsd/xsd/cxx/xml/string.hxx9
-rw-r--r--libxsd/xsd/cxx/xml/string.ixx88
-rw-r--r--libxsd/xsd/cxx/xml/string.txx294
10 files changed, 675 insertions, 370 deletions
diff --git a/libxsd/xsd/cxx/xml/char-iso8859-1.hxx b/libxsd/xsd/cxx/xml/char-iso8859-1.hxx
new file mode 100644
index 0000000..38b633f
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-iso8859-1.hxx
@@ -0,0 +1,72 @@
+// file : xsd/cxx/xml/char-iso8859-1.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER_CHAR_ISO8859_1
+
+#include <string>
+#include <cstddef> // std::size_t
+
+#include <xercesc/util/XercesDefs.hpp> // XMLCh
+
+#include <xsd/cxx/xml/exceptions.hxx> // invalid_utf16_string
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ struct iso8859_1_unrepresentable {};
+
+ // UTF-16 to/from ISO-8859-1 transcoder.
+ //
+ template <typename C>
+ struct char_iso8859_1_transcoder
+ {
+ static std::basic_string<C>
+ to (const XMLCh* s, std::size_t length);
+
+ static XMLCh*
+ from (const C* s, std::size_t length);
+
+ // Get/set a replacement for unrepresentable characters. If set to
+ // 0 (the default value), throw iso8859_1_unrepresentable instead.
+ //
+ static C
+ unrep_char ()
+ {
+ return unrep_char_;
+ }
+
+ static void
+ unrep_char (C c)
+ {
+ unrep_char_ = c;
+ }
+
+ private:
+ static C unrep_char_;
+ };
+
+ typedef char_iso8859_1_transcoder<char> char_transcoder;
+ }
+ }
+}
+
+#include <xsd/cxx/xml/char-iso8859-1.txx>
+
+#else
+# ifndef XSD_CXX_XML_TRANSCODER_CHAR_ISO8859_1
+ //
+ // If you get this error, it usually means that either you compiled
+ // your schemas with different --char-encoding values or you included
+ // some of the libxsd headers (e.g., xsd/cxx/xml/string.hxx) directly
+ // without first including the correct xsd/cxx/xml/char-*.hxx header.
+ //
+# error conflicting character encoding detected
+# endif
+#endif // XSD_CXX_XML_TRANSCODER
diff --git a/libxsd/xsd/cxx/xml/char-iso8859-1.txx b/libxsd/xsd/cxx/xml/char-iso8859-1.txx
new file mode 100644
index 0000000..6b20f01
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-iso8859-1.txx
@@ -0,0 +1,101 @@
+// file : xsd/cxx/xml/char-iso8859-1.txx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <xsd/cxx/auto-array.hxx>
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ template <typename C>
+ C char_iso8859_1_transcoder<C>::unrep_char_ = 0;
+
+ template <typename C>
+ std::basic_string<C> char_iso8859_1_transcoder<C>::
+ to (const XMLCh* s, std::size_t len)
+ {
+ const XMLCh* end (s + len);
+
+ // Find what the resulting buffer size will be.
+ //
+ std::size_t rl (0);
+ unsigned int u (0); // Four byte UCS-4 char.
+
+ bool valid (true);
+ const XMLCh* p (s);
+
+ for (; p < end; ++p)
+ {
+ if (*p >= 0xD800 && *p <= 0xDBFF)
+ {
+ // Make sure we have one more char and it has a valid
+ // value for the second char in a surrogate pair.
+ //
+ if (++p == end || !((*p >= 0xDC00) && (*p <= 0xDFFF)))
+ {
+ valid = false;
+ break;
+ }
+ }
+
+ rl++;
+ }
+
+ if (!valid)
+ throw invalid_utf16_string ();
+
+ std::basic_string<C> r;
+ r.reserve (rl + 1);
+ r.resize (rl);
+ C* rs (const_cast<C*> (r.c_str ()));
+ std::size_t i (0);
+
+ p = s;
+
+ // Tight first loop for the common case.
+ //
+ for (; p < end && *p < 0x100; ++p)
+ rs[i++] = C (*p);
+
+ if (p < end && unrep_char_ == 0)
+ throw iso8859_1_unrepresentable ();
+
+ for (; p < end; ++p)
+ {
+ XMLCh x (*p);
+
+ if ((x >= 0xD800) && (x <= 0xDBFF))
+ {
+ u = ((x - 0xD800) << 10) + (*++p - 0xDC00) + 0x10000;
+ }
+ else
+ u = x;
+
+ rs[i++] = u < 0x100 ? C (u) : unrep_char_;
+ }
+
+ return r;
+ }
+
+ template <typename C>
+ XMLCh* char_iso8859_1_transcoder<C>::
+ from (const C* s, std::size_t len)
+ {
+ const C* end (s + len);
+
+ auto_array<XMLCh> r (new XMLCh[len + 1]);
+ XMLCh* ir (r.get ());
+
+ for (const C* p (s); p < end; ++p)
+ *ir++ = static_cast<unsigned char> (*p);
+
+ *ir = XMLCh (0);
+ return r.release ();
+ }
+ }
+ }
+}
diff --git a/libxsd/xsd/cxx/xml/char-lcp.hxx b/libxsd/xsd/cxx/xml/char-lcp.hxx
new file mode 100644
index 0000000..2c41753
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-lcp.hxx
@@ -0,0 +1,56 @@
+// file : xsd/cxx/xml/char-lcp.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER_CHAR_LCP
+
+#include <string>
+#include <cstddef> // std::size_t
+
+#include <xercesc/util/XercesDefs.hpp> // XMLCh
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ // UTF-16 to/from Xerces-C++ local code page (LCP) transcoder.
+ //
+ // Note that this transcoder has a custom interface due to Xerces-C++
+ // idiosyncrasies. Don't use it as a base for your custom transcoder.
+ //
+ template <typename C>
+ struct char_lcp_transcoder
+ {
+ static std::basic_string<C>
+ to (const XMLCh* s);
+
+ static std::basic_string<C>
+ to (const XMLCh* s, std::size_t length);
+
+ static XMLCh*
+ from (const C* s);
+ };
+
+ typedef char_lcp_transcoder<char> char_transcoder;
+ }
+ }
+}
+
+#include <xsd/cxx/xml/char-lcp.txx>
+
+#else
+# ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ //
+ // If you get this error, it usually means that either you compiled
+ // your schemas with different --char-encoding values or you included
+ // some of the libxsd headers (e.g., xsd/cxx/xml/string.hxx) directly
+ // without first including the correct xsd/cxx/xml/char-*.hxx header.
+ //
+# error conflicting character encoding detected
+# endif
+#endif // XSD_CXX_XML_TRANSCODER
diff --git a/libxsd/xsd/cxx/xml/char-lcp.txx b/libxsd/xsd/cxx/xml/char-lcp.txx
new file mode 100644
index 0000000..01bb36e
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-lcp.txx
@@ -0,0 +1,55 @@
+// file : xsd/cxx/xml/char-lcp.txx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <cstring> // std::memcpy
+
+#include <xercesc/util/XMLString.hpp>
+
+#include <xsd/cxx/auto-array.hxx>
+#include <xsd/cxx/xml/std-memory-manager.hxx>
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ template <typename C>
+ std::basic_string<C> char_lcp_transcoder<C>::
+ to (const XMLCh* s)
+ {
+ std_memory_manager mm;
+ auto_array<C, std_memory_manager> r (
+ xercesc::XMLString::transcode (s, &mm), mm);
+ return std::basic_string<C> (r.get ());
+ }
+
+ template <typename C>
+ std::basic_string<C> char_lcp_transcoder<C>::
+ to (const XMLCh* s, std::size_t len)
+ {
+ auto_array<XMLCh> tmp (new XMLCh[len + 1]);
+ std::memcpy (tmp.get (), s, len * sizeof (XMLCh));
+ tmp[len] = XMLCh (0);
+
+ std_memory_manager mm;
+ auto_array<C, std_memory_manager> r (
+ xercesc::XMLString::transcode (tmp.get (), &mm), mm);
+
+ tmp.reset ();
+
+ return std::basic_string<C> (r.get ());
+ }
+
+ template <typename C>
+ XMLCh* char_lcp_transcoder<C>::
+ from (const C* s)
+ {
+ std_memory_manager mm;
+ return xercesc::XMLString::transcode (s, &mm);
+ }
+ }
+ }
+}
diff --git a/libxsd/xsd/cxx/xml/char-utf8.hxx b/libxsd/xsd/cxx/xml/char-utf8.hxx
new file mode 100644
index 0000000..c255b28
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-utf8.hxx
@@ -0,0 +1,57 @@
+// file : xsd/cxx/xml/char-utf8.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER_CHAR_UTF8
+
+#include <string>
+#include <cstddef> // std::size_t
+
+#include <xercesc/util/XercesDefs.hpp> // XMLCh
+
+#include <xsd/cxx/xml/exceptions.hxx> // invalid_utf16_string
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ struct invalid_utf8_string {};
+
+ // UTF-16 to/from UTF-8 transcoder.
+ //
+ template <typename C>
+ struct char_utf8_transcoder
+ {
+ static std::basic_string<C>
+ to (const XMLCh* s, std::size_t length);
+
+ static XMLCh*
+ from (const C* s, std::size_t length);
+
+ private:
+ static const unsigned char first_byte_mask_[5];
+ };
+
+ typedef char_utf8_transcoder<char> char_transcoder;
+ }
+ }
+}
+
+#include <xsd/cxx/xml/char-utf8.txx>
+
+#else
+# ifndef XSD_CXX_XML_TRANSCODER_CHAR_UTF8
+ //
+ // If you get this error, it usually means that either you compiled
+ // your schemas with different --char-encoding values or you included
+ // some of the libxsd headers (e.g., xsd/cxx/xml/string.hxx) directly
+ // without first including the correct xsd/cxx/xml/char-*.hxx header.
+ //
+# error conflicting character encoding detected
+# endif
+#endif // XSD_CXX_XML_TRANSCODER
diff --git a/libxsd/xsd/cxx/xml/char-utf8.txx b/libxsd/xsd/cxx/xml/char-utf8.txx
new file mode 100644
index 0000000..96b36a4
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-utf8.txx
@@ -0,0 +1,293 @@
+// file : xsd/cxx/xml/char-utf8.txx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <xsd/cxx/auto-array.hxx>
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ template <typename C>
+ const unsigned char char_utf8_transcoder<C>::first_byte_mask_[5] =
+ {
+ 0x00, 0x00, 0xC0, 0xE0, 0xF0
+ };
+
+ template <typename C>
+ std::basic_string<C> char_utf8_transcoder<C>::
+ to (const XMLCh* s, std::size_t len)
+ {
+ const XMLCh* end (s + len);
+
+ // Find what the resulting buffer size will be.
+ //
+ std::size_t rl (0);
+ unsigned int u (0); // Four byte UCS-4 char.
+
+ bool valid (true);
+ const XMLCh* p (s);
+
+ for (; p < end; ++p)
+ {
+ XMLCh x (*p);
+
+ if (x < 0xD800 || x > 0xDBFF)
+ u = x;
+ else
+ {
+ // Make sure we have one more char and it has a valid
+ // value for the second char in a surrogate pair.
+ //
+ if (++p == end || !((*p >= 0xDC00) && (*p <= 0xDFFF)))
+ {
+ valid = false;
+ break;
+ }
+
+ u = ((x - 0xD800) << 10) + (*p - 0xDC00) + 0x10000;
+ }
+
+ if (u < 0x80)
+ rl++;
+ else if (u < 0x800)
+ rl += 2;
+ else if (u < 0x10000)
+ rl += 3;
+ else if (u < 0x110000)
+ rl += 4;
+ else
+ {
+ valid = false;
+ break;
+ }
+ }
+
+ if (!valid)
+ throw invalid_utf16_string ();
+
+ std::basic_string<C> r;
+ r.reserve (rl + 1);
+ r.resize (rl);
+ C* rs (const_cast<C*> (r.c_str ()));
+
+ std::size_t i (0);
+ unsigned int count (0);
+
+ p = s;
+
+ // Tight first loop for the common case.
+ //
+ for (; p < end && *p < 0x80; ++p)
+ rs[i++] = C (*p);
+
+ for (; p < end; ++p)
+ {
+ XMLCh x (*p);
+
+ if ((x >= 0xD800) && (x <= 0xDBFF))
+ {
+ u = ((x - 0xD800) << 10) + (*++p - 0xDC00) + 0x10000;
+ }
+ else
+ u = x;
+
+ if (u < 0x80)
+ count = 1;
+ else if (u < 0x800)
+ count = 2;
+ else if (u < 0x10000)
+ count = 3;
+ else if (u < 0x110000)
+ count = 4;
+
+ switch(count)
+ {
+ case 4:
+ {
+ rs[i + 3] = C ((u | 0x80UL) & 0xBFUL);
+ u >>= 6;
+ }
+ case 3:
+ {
+ rs[i + 2] = C ((u | 0x80UL) & 0xBFUL);
+ u >>= 6;
+ }
+ case 2:
+ {
+ rs[i + 1] = C ((u | 0x80UL) & 0xBFUL);
+ u >>= 6;
+ }
+ case 1:
+ {
+ rs[i] = C (u | first_byte_mask_[count]);
+ }
+ }
+
+ i += count;
+ }
+
+ return r;
+ }
+
+ template <typename C>
+ XMLCh* char_utf8_transcoder<C>::
+ from (const C* s, std::size_t len)
+ {
+ bool valid (true);
+ const C* end (s + len);
+
+ // Find what the resulting buffer size will be.
+ //
+ std::size_t rl (0);
+ unsigned int count (0);
+
+ for (const C* p (s); p < end; ++p)
+ {
+ unsigned char c (*p);
+
+ if (c < 0x80)
+ {
+ // Fast path.
+ //
+ rl += 1;
+ continue;
+ }
+ else if ((c >> 5) == 0x06)
+ count = 2;
+ else if ((c >> 4) == 0x0E)
+ count = 3;
+ else if ((c >> 3) == 0x1E)
+ count = 4;
+ else
+ {
+ valid = false;
+ break;
+ }
+
+ p += count - 1; // One will be added in the for loop
+
+ if (p + 1 > end)
+ {
+ valid = false;
+ break;
+ }
+
+ // BMP is represented by up to 3 code points in UTF-8.
+ //
+ rl += count > 3 ? 2 : 1;
+ }
+
+ if (!valid)
+ throw invalid_utf8_string ();
+
+ auto_array<XMLCh> r (new XMLCh[rl + 1]);
+ XMLCh* ir (r.get ());
+
+ unsigned int u (0); // Four byte UCS-4 char.
+
+ for (const C* p (s); p < end; ++p)
+ {
+ unsigned char c (*p);
+
+ if (c < 0x80)
+ {
+ // Fast path.
+ //
+ *ir++ = static_cast<XMLCh> (c);
+ continue;
+ }
+ else if ((c >> 5) == 0x06)
+ {
+ // UTF-8: 110yyyyy 10zzzzzz
+ // Unicode: 00000yyy yyzzzzzz
+ //
+ u = (c & 0x1F) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u |= c & 0x3F;
+ }
+ else if ((c >> 4) == 0x0E)
+ {
+ // UTF-8: 1110xxxx 10yyyyyy 10zzzzzz
+ // Unicode: xxxxyyyy yyzzzzzz
+ //
+ u = (c & 0x0F) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u = (u | (c & 0x3F)) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u |= c & 0x3F;
+ }
+ else if ((c >> 3) == 0x1E)
+ {
+ // UTF-8: 000wwwxx xxxxyyyy yyzzzzzz
+ // Unicode: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ //
+ u = (c & 0x07) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u = (u | (c & 0x3F)) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u = (u | (c & 0x3F)) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u |= c & 0x3F;
+ }
+
+ if (u & 0xFFFF0000)
+ {
+ // Surrogate pair.
+ //
+ *ir++ = static_cast<XMLCh> (((u - 0x10000) >> 10) + 0xD800);
+ *ir++ = static_cast<XMLCh> ((u & 0x3FF) + 0xDC00);
+ }
+ else
+ *ir++ = static_cast<XMLCh> (u);
+ }
+
+ if (!valid)
+ throw invalid_utf8_string ();
+
+ *ir = XMLCh (0);
+
+ return r.release ();
+ }
+ }
+ }
+}
diff --git a/libxsd/xsd/cxx/xml/exceptions.hxx b/libxsd/xsd/cxx/xml/exceptions.hxx
new file mode 100644
index 0000000..6c2e029
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/exceptions.hxx
@@ -0,0 +1,20 @@
+// file : xsd/cxx/xml/exceptions.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_EXCEPTIONS_HXX
+#define XSD_CXX_XML_EXCEPTIONS_HXX
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ struct invalid_utf16_string {};
+ }
+ }
+}
+
+#endif // XSD_CXX_XML_EXCEPTIONS_HXX
diff --git a/libxsd/xsd/cxx/xml/string.hxx b/libxsd/xsd/cxx/xml/string.hxx
index 2d08134..ec666ee 100644
--- a/libxsd/xsd/cxx/xml/string.hxx
+++ b/libxsd/xsd/cxx/xml/string.hxx
@@ -7,6 +7,7 @@
#define XSD_CXX_XML_STRING_HXX
#include <string>
+#include <cstddef> // std::size_t
#include <xsd/cxx/auto-array.hxx>
#include <xercesc/util/XercesDefs.hpp> // XMLCh
@@ -17,12 +18,6 @@ namespace xsd
{
namespace xml
{
- //
- //
- struct invalid_utf8_string {};
- struct invalid_utf16_string {};
-
-
// Transcode a null-terminated string.
//
template <typename C>
@@ -84,7 +79,7 @@ namespace xsd
}
}
-#endif // XSD_CXX_XML_STRING_HXX
+#endif // XSD_CXX_XML_STRING_HXX
#include <xsd/cxx/xml/string.ixx>
#include <xsd/cxx/xml/string.txx>
diff --git a/libxsd/xsd/cxx/xml/string.ixx b/libxsd/xsd/cxx/xml/string.ixx
index bde86d8..056a15f 100644
--- a/libxsd/xsd/cxx/xml/string.ixx
+++ b/libxsd/xsd/cxx/xml/string.ixx
@@ -6,11 +6,13 @@
#ifndef XSD_CXX_XML_STRING_IXX
#define XSD_CXX_XML_STRING_IXX
-#include <cassert>
-#include <cstring> // std::memcpy
-
#include <xercesc/util/XMLString.hpp>
-#include <xsd/cxx/xml/std-memory-manager.hxx>
+
+// If no transcoder has been included, use the default UTF-8.
+//
+#ifndef XSD_CXX_XML_TRANSCODER
+# include <xsd/cxx/xml/char-utf8.hxx>
+#endif
// We sometimes need this functionality even if we are building for
// wchar_t.
@@ -21,43 +23,17 @@ namespace xsd
{
namespace xml
{
-#ifndef XSD_USE_LCP
- namespace bits
- {
- // UTF-16 to/from UTF-8 transcoder.
- //
- template <typename C>
- struct char_transcoder
- {
- static std::basic_string<C>
- to (const XMLCh* s, std::size_t length);
-
- static XMLCh*
- from (const C* s, std::size_t length);
-
- private:
- static const unsigned char first_byte_mask_[5];
- };
- }
-#endif
-
template <>
inline std::basic_string<char>
transcode<char> (const XMLCh* s)
{
- if (s == 0)
+ if (s == 0 || *s == XMLCh (0))
return std::basic_string<char> ();
-#ifndef XSD_USE_LCP
- return bits::char_transcoder<char>::to (
- s, xercesc::XMLString::stringLen (s));
+#ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ return char_transcoder::to (s, xercesc::XMLString::stringLen (s));
#else
- // Use Xerces-C++ local code page transcoding.
- //
- std_memory_manager mm;
- auto_array<char, std_memory_manager> r (
- xercesc::XMLString::transcode (s, &mm), mm);
- return std::basic_string<char> (r.get ());
+ return char_transcoder::to (s);
#endif
}
@@ -68,41 +44,17 @@ namespace xsd
if (s == 0 || len == 0)
return std::basic_string<char> ();
-#ifndef XSD_USE_LCP
- // Convert UTF-16 to UTF-8
- //
- return bits::char_transcoder<char>::to (s, len);
-#else
- // Use Xerces-C++ local code page transcoding.
- //
- auto_array<XMLCh> tmp (new XMLCh[len + 1]);
- std::memcpy (tmp.get (), s, len * sizeof (XMLCh));
- tmp[len] = XMLCh (0);
-
- std_memory_manager mm;
- auto_array<char, std_memory_manager> r (
- xercesc::XMLString::transcode (tmp.get (), &mm), mm);
-
- tmp.reset ();
-
- return std::basic_string<char> (r.get ());
-#endif
+ return char_transcoder::to (s, len);
}
template <>
inline XMLCh*
transcode_to_xmlch (const char* s)
{
-#ifndef XSD_USE_LCP
- // Convert UTF-8 to UTF-16
- //
- return bits::char_transcoder<char>::from (
- s, std::char_traits<char>::length (s));
+#ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ return char_transcoder::from (s, std::char_traits<char>::length (s));
#else
- // Use Xerces-C++ local code page transcoding.
- //
- std_memory_manager mm;
- return xercesc::XMLString::transcode (s, &mm);
+ return char_transcoder::from (s);
#endif
}
@@ -110,16 +62,10 @@ namespace xsd
inline XMLCh*
transcode_to_xmlch (const std::basic_string<char>& s)
{
-#ifndef XSD_USE_LCP
- // Convert UTF-8 to UTF-16
- //
- return bits::char_transcoder<char>::from (
- s.c_str (), s.length ());
+#ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ return char_transcoder::from (s.c_str (), s.length ());
#else
- // Use Xerces-C++ local code page transcoding.
- //
- std_memory_manager mm;
- return xercesc::XMLString::transcode (s.c_str (), &mm);
+ return char_transcoder::from (s.c_str ());
#endif
}
}
diff --git a/libxsd/xsd/cxx/xml/string.txx b/libxsd/xsd/cxx/xml/string.txx
index cdef87e..f71480e 100644
--- a/libxsd/xsd/cxx/xml/string.txx
+++ b/libxsd/xsd/cxx/xml/string.txx
@@ -6,306 +6,16 @@
#ifndef XSD_CXX_XML_STRING_TXX
#define XSD_CXX_XML_STRING_TXX
-#ifndef XSD_USE_LCP
-namespace xsd
-{
- namespace cxx
- {
- namespace xml
- {
- namespace bits
- {
- template <typename C>
- const unsigned char char_transcoder<C>::first_byte_mask_[5] =
- {
- 0x00, 0x00, 0xC0, 0xE0, 0xF0
- };
-
- template <typename C>
- std::basic_string<C> char_transcoder<C>::
- to (const XMLCh* s, std::size_t len)
- {
- const XMLCh* end (s + len);
-
- // Find what the resulting buffer size will be.
- //
- std::size_t rl (0);
- unsigned int u (0); // Four byte UCS-4 char.
-
- bool valid (true);
- const XMLCh* p (s);
- for (; p < end; ++p)
- {
- XMLCh x (*p);
-
- if (x < 0xD800 || x > 0xDBFF)
- u = x;
- else
- {
- // Make sure we have one more char and it has a valid
- // value for the second char in a surrogate pair.
- //
- if (++p == end || !((*p >= 0xDC00) && (*p <= 0xDFFF)))
- {
- valid = false;
- break;
- }
-
- u = ((x - 0xD800) << 10) + (*p - 0xDC00) + 0x10000;
- }
-
- if (u < 0x80)
- rl++;
- else if (u < 0x800)
- rl += 2;
- else if (u < 0x10000)
- rl += 3;
- else if (u < 0x110000)
- rl += 4;
- else
- {
- valid = false;
- break;
- }
- }
-
- if (!valid)
- throw invalid_utf16_string ();
-
- std::basic_string<C> r;
- r.reserve (rl + 1);
- r.resize (rl);
- C* rs (const_cast<C*> (r.c_str ()));
-
- std::size_t i (0);
- unsigned int count (0);
-
- p = s;
-
- // Tight first loop for the common case.
- //
- for (; p < end && *p < 0x80; ++p)
- rs[i++] = C (*p);
-
- for (; p < end; ++p)
- {
- XMLCh x (*p);
-
- if ((x >= 0xD800) && (x <= 0xDBFF))
- {
- u = ((x - 0xD800) << 10) + (*++p - 0xDC00) + 0x10000;
- }
- else
- u = x;
-
- if (u < 0x80)
- count = 1;
- else if (u < 0x800)
- count = 2;
- else if (u < 0x10000)
- count = 3;
- else if (u < 0x110000)
- count = 4;
-
- switch(count)
- {
- case 4:
- {
- rs[i + 3] = C ((u | 0x80UL) & 0xBFUL);
- u >>= 6;
- }
- case 3:
- {
- rs[i + 2] = C ((u | 0x80UL) & 0xBFUL);
- u >>= 6;
- }
- case 2:
- {
- rs[i + 1] = C ((u | 0x80UL) & 0xBFUL);
- u >>= 6;
- }
- case 1:
- {
- rs[i] = C (u | first_byte_mask_[count]);
- }
- }
-
- i += count;
- }
-
- return r;
- }
-
- template <typename C>
- XMLCh* char_transcoder<C>::
- from (const C* s, std::size_t len)
- {
- bool valid (true);
- const C* end (s + len);
-
- // Find what the resulting buffer size will be.
- //
- std::size_t rl (0);
- unsigned int count (0);
-
- for (const C* p (s); p < end; ++p)
- {
- unsigned char c (*p);
-
- if (c < 0x80)
- {
- // Fast path.
- //
- rl += 1;
- continue;
- }
- else if ((c >> 5) == 0x06)
- count = 2;
- else if ((c >> 4) == 0x0E)
- count = 3;
- else if ((c >> 3) == 0x1E)
- count = 4;
- else
- {
- valid = false;
- break;
- }
-
- p += count - 1; // One will be added in the for loop
-
- if (p + 1 > end)
- {
- valid = false;
- break;
- }
-
- // BMP is represented by up to 3 code points in UTF-8.
- //
- rl += count > 3 ? 2 : 1;
- }
-
- if (!valid)
- throw invalid_utf8_string ();
-
- auto_array<XMLCh> r (new XMLCh[rl + 1]);
- XMLCh* ir (r.get ());
-
- unsigned int u (0); // Four byte UCS-4 char.
-
- for (const C* p (s); p < end; ++p)
- {
- unsigned char c (*p);
-
- if (c < 0x80)
- {
- // Fast path.
- //
- *ir++ = static_cast<XMLCh> (c);
- continue;
- }
- else if ((c >> 5) == 0x06)
- {
- // UTF-8: 110yyyyy 10zzzzzz
- // Unicode: 00000yyy yyzzzzzz
- //
- u = (c & 0x1F) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u |= c & 0x3F;
- }
- else if ((c >> 4) == 0x0E)
- {
- // UTF-8: 1110xxxx 10yyyyyy 10zzzzzz
- // Unicode: xxxxyyyy yyzzzzzz
- //
- u = (c & 0x0F) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u = (u | (c & 0x3F)) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u |= c & 0x3F;
- }
- else if ((c >> 3) == 0x1E)
- {
- // UTF-8: 000wwwxx xxxxyyyy yyzzzzzz
- // Unicode: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
- //
- u = (c & 0x07) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u = (u | (c & 0x3F)) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u = (u | (c & 0x3F)) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u |= c & 0x3F;
- }
-
- if (u & 0xFFFF0000)
- {
- // Surrogate pair.
- //
- *ir++ = static_cast<XMLCh> (((u - 0x10000) >> 10) + 0xD800);
- *ir++ = static_cast<XMLCh> ((u & 0x3FF) + 0xDC00);
- }
- else
- *ir++ = static_cast<XMLCh> (u);
- }
-
- if (!valid)
- throw invalid_utf8_string ();
-
- *ir = XMLCh (0);
-
- return r.release ();
- }
- }
- }
- }
-}
-
-#endif // XSD_USE_LCP
#endif // XSD_CXX_XML_STRING_TXX
-
#if defined(XSD_USE_WCHAR) || !defined(XSD_USE_CHAR)
#ifndef XSD_CXX_XML_STRING_TXX_WCHAR
#define XSD_CXX_XML_STRING_TXX_WCHAR
+#include <xsd/cxx/xml/exceptions.hxx>
+
namespace xsd
{
namespace cxx