aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2009-12-08 16:18:01 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2009-12-08 16:18:01 +0200
commit1ca6396a3dd284241de11bcaa210ad5836e8e5a8 (patch)
tree465c19f0d668a91bb556d748911847acfb80cb09
parentd71611d5fb575078bdf573c35257bb86bb7054e0 (diff)
Multiple object model character encodings support
Also add support for ISO-8859-1.
-rw-r--r--NEWS9
-rw-r--r--documentation/custom-literals.xsd49
-rw-r--r--documentation/cxx/parser/guide/index.xhtml40
-rw-r--r--documentation/cxx/tree/guide/index.xhtml25
-rw-r--r--documentation/cxx/tree/manual/index.xhtml18
-rw-r--r--documentation/makefile2
-rw-r--r--documentation/xsd.139
-rw-r--r--documentation/xsd.xhtml27
-rw-r--r--libxsd/xsd/cxx/xml/char-iso8859-1.hxx72
-rw-r--r--libxsd/xsd/cxx/xml/char-iso8859-1.txx101
-rw-r--r--libxsd/xsd/cxx/xml/char-lcp.hxx56
-rw-r--r--libxsd/xsd/cxx/xml/char-lcp.txx55
-rw-r--r--libxsd/xsd/cxx/xml/char-utf8.hxx57
-rw-r--r--libxsd/xsd/cxx/xml/char-utf8.txx293
-rw-r--r--libxsd/xsd/cxx/xml/exceptions.hxx20
-rw-r--r--libxsd/xsd/cxx/xml/string.hxx9
-rw-r--r--libxsd/xsd/cxx/xml/string.ixx88
-rw-r--r--libxsd/xsd/cxx/xml/string.txx294
-rw-r--r--tests/cxx/tree/encoding/char/iso-8859-1/driver.cxx76
-rw-r--r--tests/cxx/tree/encoding/char/iso-8859-1/makefile83
-rw-r--r--tests/cxx/tree/encoding/char/iso-8859-1/test.std18
-rw-r--r--tests/cxx/tree/encoding/char/iso-8859-1/test.xml14
-rw-r--r--tests/cxx/tree/encoding/char/iso-8859-1/test.xsd31
-rw-r--r--tests/cxx/tree/encoding/char/lcp/driver.cxx2
-rw-r--r--tests/cxx/tree/encoding/char/lcp/makefile4
-rw-r--r--tests/cxx/tree/encoding/char/makefile2
-rw-r--r--tests/cxx/tree/encoding/char/utf-8/makefile2
-rw-r--r--xsd/cxx/elements.cxx275
-rw-r--r--xsd/cxx/elements.hxx39
-rw-r--r--xsd/cxx/literal-map.cxx296
-rw-r--r--xsd/cxx/literal-map.hxx23
-rw-r--r--xsd/cxx/parser/cli.hxx2
-rw-r--r--xsd/cxx/parser/elements.cxx3
-rw-r--r--xsd/cxx/parser/elements.hxx1
-rw-r--r--xsd/cxx/parser/generator.cxx82
-rw-r--r--xsd/cxx/parser/generator.hxx2
-rw-r--r--xsd/cxx/parser/name-processor.cxx16
-rw-r--r--xsd/cxx/parser/name-processor.hxx6
-rw-r--r--xsd/cxx/parser/parser-header.cxx7
-rw-r--r--xsd/cxx/parser/validator.cxx17
-rw-r--r--xsd/cxx/tree/cli.hxx2
-rw-r--r--xsd/cxx/tree/counter.cxx2
-rw-r--r--xsd/cxx/tree/elements.cxx3
-rw-r--r--xsd/cxx/tree/elements.hxx1
-rw-r--r--xsd/cxx/tree/generator.cxx73
-rw-r--r--xsd/cxx/tree/generator.hxx2
-rw-r--r--xsd/cxx/tree/name-processor.cxx15
-rw-r--r--xsd/cxx/tree/name-processor.hxx9
-rw-r--r--xsd/cxx/tree/tree-forward.cxx7
-rw-r--r--xsd/cxx/tree/tree-header.cxx14
-rw-r--r--xsd/cxx/tree/validator.cxx1
-rw-r--r--xsd/elements.hxx1
-rw-r--r--xsd/makefile20
-rw-r--r--xsd/xsd.cxx70
54 files changed, 2013 insertions, 462 deletions
diff --git a/NEWS b/NEWS
index 9472972..dfbb46d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,14 @@
Version 3.3.0
+ * New option, --char-encoding, allows to specify the character encoding
+ that should be used in the object model. Valid values for the 'char'
+ character type are 'utf8' (default), 'iso8859-1' (new), 'lcp' (Xerces-C++
+ local code page), and 'custom' (allows to support custom encodings). Note
+ that if you use a non-default character encoding and include some libxsd
+ headers (e.g., xsd/cxx/xml/string.hxx) directly, then you will need to
+ fist include the correct xsd/cxx/xml/char-<enc>.hxx header, where <enc>
+ is iso8859-1, lcp, etc. This mechanism replaces the XSD_USE_LCP macro.
+
* When the XSD compiler is built with Xerces-C++ 3.1.0 or later, enable
handling of multiple imports for the same namespace. Before all
subsequent imports for a namespace were ignored which caused errors
diff --git a/documentation/custom-literals.xsd b/documentation/custom-literals.xsd
new file mode 100644
index 0000000..ab2d649
--- /dev/null
+++ b/documentation/custom-literals.xsd
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+
+<!--
+
+file : documentation/custom-literals.xsd
+author : Boris Kolpackov <boris@codesynthesis.com>
+copyright : not copyrighted - public domain
+
+This schema describes the XML format used to provide the custom string
+to C++ string literal mapping with the -custom-literals XSD compiler
+command line option. Here is a sample instance:
+
+<string-literal-map>
+ <entry>
+ <string>hello</string>
+ <literal>"hello"</literal>
+ </entry>
+ <entry>
+ <string>greeting</string>
+ <literal>"greeting"</literal>
+ </entry>
+</string-literal-map>
+
+-->
+
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+
+ <xsd:simpleType name="literal_t">
+ <xsd:restriction base="xsd:string">
+ <xsd:pattern value='".+"'/>
+ </xsd:restriction>
+ </xsd:simpleType>
+
+ <xsd:complexType name="entry_t">
+ <xsd:sequence>
+ <xsd:element name="string" type="xsd:string"/>
+ <xsd:element name="literal" type="literal_t"/>
+ </xsd:sequence>
+ </xsd:complexType>
+
+ <xsd:complexType name="string_literal_map_t">
+ <xsd:sequence>
+ <xsd:element name="entry" type="entry_t" maxOccurs="unbounded"/>
+ </xsd:sequence>
+ </xsd:complexType>
+
+ <xsd:element name="string-literal-map" type="string_literal_map_t"/>
+
+</xsd:schema>
diff --git a/documentation/cxx/parser/guide/index.xhtml b/documentation/cxx/parser/guide/index.xhtml
index 7379c96..9653e37 100644
--- a/documentation/cxx/parser/guide/index.xhtml
+++ b/documentation/cxx/parser/guide/index.xhtml
@@ -280,7 +280,7 @@
<tr>
<th>5</th><td><a href="#5">Mapping Configuration</a>
<table class="toc">
- <tr><th>5.1</th><td><a href="#5.1">Character Type</a></td></tr>
+ <tr><th>5.1</th><td><a href="#5.1">Character Type and Encoding</a></td></tr>
<tr><th>5.2</th><td><a href="#5.2">Underlying XML Parser</a></td></tr>
<tr><th>5.3</th><td><a href="#5.3">XML Schema Validation</a></td></tr>
<tr><th>5.4</th><td><a href="#5.4">Support for Polymorphism</a></td></tr>
@@ -1615,8 +1615,8 @@ namespace http://www.example.com/xmlns/my
following map files. The string-based XML Schema types are
mapped to either <code>std::string</code> or
<code>std::wstring</code> depending on the character type
- selected (see <a href="#5.1"> Section 5.1, "Character Type"</a> for
- more information).</p>
+ selected (see <a href="#5.1"> Section 5.1, "Character Type and
+ Encoding"</a> for more information).</p>
<pre class="type-map">
namespace http://www.w3.org/2001/XMLSchema
@@ -1909,7 +1909,7 @@ age: 28
Compiler Command Line Manual</a>.
</p>
- <h2><a name="5.1">5.1 Character Type</a></h2>
+ <h2><a name="5.1">5.1 Character Type and Encoding</a></h2>
<p>The C++/Parser mapping has built-in support for two character types:
<code>char</code> and <code>wchar_t</code>. You can select the
@@ -1921,15 +1921,24 @@ age: 28
<p>Another aspect of the mapping that depends on the character type
is character encoding. For the <code>char</code> character type
- the encoding is UTF-8. For the <code>wchar_t</code> character type
- the encoding is automatically selected between UTF-16 and
- UTF-32/UCS-4 depending on the size of the <code>wchar_t</code> type.
- On some platforms (for example, Windows with Visual C++ and AIX with IBM XL
- C++) <code>wchar_t</code> is 2 bytes long. For these platforms the
+ the default encoding is UTF-8. Other supported encodings are
+ ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as
+ custom encodings. You can select which encoding should be used
+ in the object model with the <code>--char-encoding</code> command
+ line option.</p>
+
+ <p>For the <code>wchar_t</code> character type the encoding is
+ automatically selected between UTF-16 and UTF-32/UCS-4 depending
+ on the size of the <code>wchar_t</code> type. On some platforms
+ (for example, Windows with Visual C++ and AIX with IBM XL C++)
+ <code>wchar_t</code> is 2 bytes long. For these platforms the
encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes
- long and UTF-32/UCS-4 is used.
- </p>
+ long and UTF-32/UCS-4 is used.</p>
+ <p>Note also that the character encoding that is used in the object model
+ is independent of the encodings used in input and output XML. In fact,
+ all three (object mode, input XML, and output XML) can have different
+ encodings.</p>
<h2><a name="5.2">5.2 Underlying XML Parser</a></h2>
@@ -3306,7 +3315,7 @@ namespace xml_schema
<code>document</code> type has the following interface. Note that
if the character type is <code>wchar_t</code>, then the string type
in the interface becomes <code>std::wstring</code>
- (see <a href="#5.1">Section 5.1, "Character Type"</a>).</p>
+ (see <a href="#5.1">Section 5.1, "Character Type and Encoding"</a>).</p>
<pre class="c++">
namespace xml_schema
@@ -3601,7 +3610,7 @@ namespace xml_schema
<code>document</code> type has the following interface. Note that
if the character type is <code>wchar_t</code>, then the string type
in the interface becomes <code>std::wstring</code>
- (see <a href="#5.1">Section 5.1, "Character Type"</a>).</p>
+ (see <a href="#5.1">Section 5.1, "Character Type and Encoding"</a>).</p>
<pre class="c++">
namespace xml_schema
@@ -3886,7 +3895,8 @@ main (int argc, char* argv[])
character type is <code>wchar_t</code>, then the string type
and output stream type in the definition become
<code>std::wstring</code> and <code>std::wostream</code>,
- respectively (see <a href="#5.1">Section 5.1, "Character Type"</a>).</p>
+ respectively (see <a href="#5.1">Section 5.1, "Character Type
+ and Encoding"</a>).</p>
<pre class="c++">
namespace xml_schema
@@ -3998,7 +4008,7 @@ main (int argc, char* argv[])
listing presents the definition of the <code>error_handler</code>
interface. Note that if the character type is <code>wchar_t</code>,
then the string type in the interface becomes <code>std::wstring</code>
- (see <a href="#5.1">Section 5.1, "Character Type"</a>).</p>
+ (see <a href="#5.1">Section 5.1, "Character Type and Encoding"</a>).</p>
<pre class="c++">
namespace xml_schema
diff --git a/documentation/cxx/tree/guide/index.xhtml b/documentation/cxx/tree/guide/index.xhtml
index 787610a..f96b09b 100644
--- a/documentation/cxx/tree/guide/index.xhtml
+++ b/documentation/cxx/tree/guide/index.xhtml
@@ -226,7 +226,7 @@
<tr>
<th>3</th><td><a href="#3">Overall Mapping Configuration</a>
<table class="toc">
- <tr><th>3.1</th><td><a href="#3.1">Character Type</a></td></tr>
+ <tr><th>3.1</th><td><a href="#3.1">Character Type and Encoding</a></td></tr>
<tr><th>3.2</th><td><a href="#3.2">Support for Polymorphism </a></td></tr>
<tr><th>3.3</th><td><a href="#3.3">Namespace Mapping</a></td></tr>
<tr><th>3.4</th><td><a href="#3.4">Thread Safety</a></td></tr>
@@ -1148,7 +1148,7 @@ $ doxygen hello.doxygen
Compiler Command Line Manual</a>.
</p>
- <h2><a name="3.1">3.1 Character Type</a></h2>
+ <h2><a name="3.1">3.1 Character Type and Encoding</a></h2>
<p>The C++/Tree mapping has built-in support for two character types:
<code>char</code> and <code>wchar_t</code>. You can select the
@@ -1160,14 +1160,25 @@ $ doxygen hello.doxygen
<p>Another aspect of the mapping that depends on the character type
is character encoding. For the <code>char</code> character type
- the encoding is UTF-8. For the <code>wchar_t</code> character type
- the encoding is automatically selected between UTF-16 and
- UTF-32/UCS-4 depending on the size of the <code>wchar_t</code> type.
- On some platforms (for example, Windows with Visual C++ and AIX with IBM XL
- C++) <code>wchar_t</code> is 2 bytes long. For these platforms the
+ the default encoding is UTF-8. Other supported encodings are
+ ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as
+ custom encodings. You can select which encoding should be used
+ in the object model with the <code>--char-encoding</code> command
+ line option.</p>
+
+ <p>For the <code>wchar_t</code> character type the encoding is
+ automatically selected between UTF-16 and UTF-32/UCS-4 depending
+ on the size of the <code>wchar_t</code> type. On some platforms
+ (for example, Windows with Visual C++ and AIX with IBM XL C++)
+ <code>wchar_t</code> is 2 bytes long. For these platforms the
encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes
long and UTF-32/UCS-4 is used.</p>
+ <p>Note also that the character encoding that is used in the object model
+ is independent of the encodings used in input and output XML. In fact,
+ all three (object mode, input XML, and output XML) can have different
+ encodings.</p>
+
<h2><a name="3.2">3.2 Support for Polymorphism</a></h2>
<p>By default XSD generates non-polymorphic code. If your vocabulary
diff --git a/documentation/cxx/tree/manual/index.xhtml b/documentation/cxx/tree/manual/index.xhtml
index d468fe3..91c6154 100644
--- a/documentation/cxx/tree/manual/index.xhtml
+++ b/documentation/cxx/tree/manual/index.xhtml
@@ -226,7 +226,7 @@
<th>2.1</th><td><a href="#2.1">Preliminary Information</a>
<table class="toc">
<tr><th>2.1.1</th><td><a href="#2.1.1">Identifiers</a></td></tr>
- <tr><th>2.1.2</th><td><a href="#2.1.2">Character Type</a></td></tr>
+ <tr><th>2.1.2</th><td><a href="#2.1.2">Character Type and Encoding</a></td></tr>
<tr><th>2.1.3</th><td><a href="#2.1.3">XML Schema Namespace</a></td></tr>
<tr><th>2.1.4</th><td><a href="#2.1.4">Anonymous Types</a></td></tr>
</table>
@@ -567,7 +567,7 @@
CONVENTION section in the <a href="http://www.codesynthesis.com/projects/xsd/documentation/xsd.xhtml">XSD
Compiler Command Line Manual</a>.</p>
- <h3><a name="2.1.2">2.1.2 Character Type</a></h3>
+ <h3><a name="2.1.2">2.1.2 Character Type and Encoding</a></h3>
<p>The code that implements the mapping, depending on the
<code>--char-type</code> option, is generated using either
@@ -577,6 +577,20 @@
your schemas, for example <code>std::basic_string&lt;C></code>.
</p>
+ <p>Another aspect of the mapping that depends on the character type
+ is character encoding. For the <code>char</code> character type
+ the default encoding is UTF-8. Other supported encodings are
+ ISO-8859-1, Xerces-C++ Local Code Page (LPC), as well as
+ custom encodings and can be selected with the
+ <code>--char-encoding</code> command line option.</p>
+
+ <p>For the <code>wchar_t</code> character type the encoding is
+ automatically selected between UTF-16 and UTF-32/UCS-4 depending
+ on the size of the <code>wchar_t</code> type. On some platforms
+ (for example, Windows with Visual C++ and AIX with IBM XL C++)
+ <code>wchar_t</code> is 2 bytes long. For these platforms the
+ encoding is UTF-16. On other platforms <code>wchar_t</code> is 4 bytes
+ long and UTF-32/UCS-4 is used.</p>
<h3><a name="2.1.3">2.1.3 XML Schema Namespace</a></h3>
diff --git a/documentation/makefile b/documentation/makefile
index 0638928..81a26fe 100644
--- a/documentation/makefile
+++ b/documentation/makefile
@@ -20,6 +20,7 @@ $(install): $(out_base)/cxx/.install
$(call install-data,$(src_base)/future.xhtml,$(install_doc_dir)/xsd/future.xhtml)
$(call install-data,$(src_base)/schema-authoring-guide.xhtml,$(install_doc_dir)/xsd/schema-authoring-guide.xhtml)
$(call install-data,$(src_base)/xsd.xhtml,$(install_doc_dir)/xsd/xsd.xhtml)
+ $(call install-data,$(src_base)/custom-literals.xsd,$(install_doc_dir)/xsd/custom-literals.xsd)
$(call install-data,$(src_base)/xsd.1,$(install_man_dir)/man1/xsd.1)
# Dist.
@@ -32,6 +33,7 @@ $(dist-common):
$(call install-data,$(src_base)/xsd.1,$(dist_prefix)/documentation/xsd.1)
$(call install-data,$(src_base)/future.xhtml,$(dist_prefix)/documentation/future.xhtml)
$(call install-data,$(src_base)/schema-authoring-guide.xhtml,$(dist_prefix)/documentation/schema-authoring-guide.xhtml)
+ $(call install-data,$(src_base)/custom-literals.xsd,$(dist_prefix)/documentation/custom-literals.xsd)
$(dist): $(dist-common) $(out_base)/cxx/.dist
$(dist-win): $(dist-common) $(out_base)/cxx/.dist-win
diff --git a/documentation/xsd.1 b/documentation/xsd.1
index b84586d..1038d50 100644
--- a/documentation/xsd.1
+++ b/documentation/xsd.1
@@ -127,6 +127,34 @@ Valid values are
and
.BR wchar_t .
.
+.IP "\fB\--char-encoding \fIenc\fR"
+Specify the character encoding that should be used in the object model.
+Valid values for the
+.B char
+character type are
+.B utf8
+(default),
+.BR iso8859-1 , lcp
+(Xerces-C++ local code page),
+and
+.BR custom .
+If you pass
+.B custom
+as the value then you will need to include the transcoder implementation
+header for your encoding at the beginning of the generated header files
+(see the
+.B --hxx-prologue
+option).
+
+For the
+.B wchar_t
+character type the only valid value is
+.B auto
+and the encoding is automatically selected between UTF-16 and UTF-32/UCS-4,
+depending on the
+.B wchar_t
+type size.
+.
.IP "\fB\--output-dir \fIdir\fR"
Write generated files to
.I dir
@@ -450,6 +478,17 @@ in places where DLL export/import control statements (
.BR __declspec(dllexport/dllimport) )
are necessary.
+.IP "\fB\--custom-literals \fIfile\fR"
+Load custom XML string to C++ literal mappings from
+.IR file .
+This mechanism can be useful if you are using a custom character encoding
+and some of the strings in your schemas, for example element/attribute
+names or enumeration values, contain non-ASCII characters. In this case
+you will need to provide a custom mapping to C++ literals for such
+strings. The format of this file is specified in the
+.B custom-literals.xsd
+XML Schema file that can be found in the documentation directory.
+
.IP "\fB\--export-xml-schema\fR"
Export/import types in the XML Schema namespace using the export
symbol provided with the
diff --git a/documentation/xsd.xhtml b/documentation/xsd.xhtml
index 49d6503..da2b52c 100644
--- a/documentation/xsd.xhtml
+++ b/documentation/xsd.xhtml
@@ -125,6 +125,21 @@
instead of the default <code><b>char</b></code>. Valid values
are <code><b>char</b></code> and <code><b>wchar_t</b></code>.</dd>
+ <dt><code><b>--char-encoding</b> <i>enc</i></code></dt>
+ <dd>Specify the character encoding that should be used in the object
+ model. Valid values for the <code><b>char</b></code> character type
+ are <code><b>utf8</b></code> (default), <code><b>iso8859-1</b></code>,
+ <code><b>lcp</b></code> (Xerces-C++ local code page), and
+ <code><b>custom</b></code>. If you pass <code><b>custom</b></code> as
+ the value then you will need to include the transcoder implementation
+ header for your encoding at the beginning of the generated header
+ files (see the <code><b>--hxx-prologue</b></code> option).
+
+ <p>For the <code><b>wchar_t</b></code> character type the only valid
+ value is <code><b>auto</b></code> and the encoding is automatically
+ selected between UTF-16 and UTF-32/UCS-4, depending on the
+ <code><b>wchar_t</b></code> type size.</p></dd>
+
<dt><code><b>--output-dir</b> <i>dir</i></code></dt>
<dd>Write generated files to <code><i>dir</i></code> instead of
the current directory.</dd>
@@ -393,6 +408,18 @@
generated file for which there is no file-specific epilogue file.
</dd>
+ <dt><code><b>--custom-literals</b> <i>file</i></code></dt>
+ <dd>Load custom XML string to C++ literal mappings from
+ <code><i>file</i></code>. This mechanism can be useful if you
+ are using a custom character encoding and some of the strings
+ in your schemas, for example element/attribute names or enumeration
+ values, contain non-ASCII characters. In this case you will need
+ to provide a custom mapping to C++ literals for such
+ strings. The format of this file is specified in the
+ <code><b>custom-literals.xsd</b></code> XML Schema file that
+ can be found in the documentation directory.
+ </dd>
+
<dt><code><b>--export-symbol</b> <i>symbol</i></code></dt>
<dd>Insert <code><i>symbol</i></code> in places where DLL
export/import control statements
diff --git a/libxsd/xsd/cxx/xml/char-iso8859-1.hxx b/libxsd/xsd/cxx/xml/char-iso8859-1.hxx
new file mode 100644
index 0000000..38b633f
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-iso8859-1.hxx
@@ -0,0 +1,72 @@
+// file : xsd/cxx/xml/char-iso8859-1.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER_CHAR_ISO8859_1
+
+#include <string>
+#include <cstddef> // std::size_t
+
+#include <xercesc/util/XercesDefs.hpp> // XMLCh
+
+#include <xsd/cxx/xml/exceptions.hxx> // invalid_utf16_string
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ struct iso8859_1_unrepresentable {};
+
+ // UTF-16 to/from ISO-8859-1 transcoder.
+ //
+ template <typename C>
+ struct char_iso8859_1_transcoder
+ {
+ static std::basic_string<C>
+ to (const XMLCh* s, std::size_t length);
+
+ static XMLCh*
+ from (const C* s, std::size_t length);
+
+ // Get/set a replacement for unrepresentable characters. If set to
+ // 0 (the default value), throw iso8859_1_unrepresentable instead.
+ //
+ static C
+ unrep_char ()
+ {
+ return unrep_char_;
+ }
+
+ static void
+ unrep_char (C c)
+ {
+ unrep_char_ = c;
+ }
+
+ private:
+ static C unrep_char_;
+ };
+
+ typedef char_iso8859_1_transcoder<char> char_transcoder;
+ }
+ }
+}
+
+#include <xsd/cxx/xml/char-iso8859-1.txx>
+
+#else
+# ifndef XSD_CXX_XML_TRANSCODER_CHAR_ISO8859_1
+ //
+ // If you get this error, it usually means that either you compiled
+ // your schemas with different --char-encoding values or you included
+ // some of the libxsd headers (e.g., xsd/cxx/xml/string.hxx) directly
+ // without first including the correct xsd/cxx/xml/char-*.hxx header.
+ //
+# error conflicting character encoding detected
+# endif
+#endif // XSD_CXX_XML_TRANSCODER
diff --git a/libxsd/xsd/cxx/xml/char-iso8859-1.txx b/libxsd/xsd/cxx/xml/char-iso8859-1.txx
new file mode 100644
index 0000000..6b20f01
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-iso8859-1.txx
@@ -0,0 +1,101 @@
+// file : xsd/cxx/xml/char-iso8859-1.txx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <xsd/cxx/auto-array.hxx>
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ template <typename C>
+ C char_iso8859_1_transcoder<C>::unrep_char_ = 0;
+
+ template <typename C>
+ std::basic_string<C> char_iso8859_1_transcoder<C>::
+ to (const XMLCh* s, std::size_t len)
+ {
+ const XMLCh* end (s + len);
+
+ // Find what the resulting buffer size will be.
+ //
+ std::size_t rl (0);
+ unsigned int u (0); // Four byte UCS-4 char.
+
+ bool valid (true);
+ const XMLCh* p (s);
+
+ for (; p < end; ++p)
+ {
+ if (*p >= 0xD800 && *p <= 0xDBFF)
+ {
+ // Make sure we have one more char and it has a valid
+ // value for the second char in a surrogate pair.
+ //
+ if (++p == end || !((*p >= 0xDC00) && (*p <= 0xDFFF)))
+ {
+ valid = false;
+ break;
+ }
+ }
+
+ rl++;
+ }
+
+ if (!valid)
+ throw invalid_utf16_string ();
+
+ std::basic_string<C> r;
+ r.reserve (rl + 1);
+ r.resize (rl);
+ C* rs (const_cast<C*> (r.c_str ()));
+ std::size_t i (0);
+
+ p = s;
+
+ // Tight first loop for the common case.
+ //
+ for (; p < end && *p < 0x100; ++p)
+ rs[i++] = C (*p);
+
+ if (p < end && unrep_char_ == 0)
+ throw iso8859_1_unrepresentable ();
+
+ for (; p < end; ++p)
+ {
+ XMLCh x (*p);
+
+ if ((x >= 0xD800) && (x <= 0xDBFF))
+ {
+ u = ((x - 0xD800) << 10) + (*++p - 0xDC00) + 0x10000;
+ }
+ else
+ u = x;
+
+ rs[i++] = u < 0x100 ? C (u) : unrep_char_;
+ }
+
+ return r;
+ }
+
+ template <typename C>
+ XMLCh* char_iso8859_1_transcoder<C>::
+ from (const C* s, std::size_t len)
+ {
+ const C* end (s + len);
+
+ auto_array<XMLCh> r (new XMLCh[len + 1]);
+ XMLCh* ir (r.get ());
+
+ for (const C* p (s); p < end; ++p)
+ *ir++ = static_cast<unsigned char> (*p);
+
+ *ir = XMLCh (0);
+ return r.release ();
+ }
+ }
+ }
+}
diff --git a/libxsd/xsd/cxx/xml/char-lcp.hxx b/libxsd/xsd/cxx/xml/char-lcp.hxx
new file mode 100644
index 0000000..2c41753
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-lcp.hxx
@@ -0,0 +1,56 @@
+// file : xsd/cxx/xml/char-lcp.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER_CHAR_LCP
+
+#include <string>
+#include <cstddef> // std::size_t
+
+#include <xercesc/util/XercesDefs.hpp> // XMLCh
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ // UTF-16 to/from Xerces-C++ local code page (LCP) transcoder.
+ //
+ // Note that this transcoder has a custom interface due to Xerces-C++
+ // idiosyncrasies. Don't use it as a base for your custom transcoder.
+ //
+ template <typename C>
+ struct char_lcp_transcoder
+ {
+ static std::basic_string<C>
+ to (const XMLCh* s);
+
+ static std::basic_string<C>
+ to (const XMLCh* s, std::size_t length);
+
+ static XMLCh*
+ from (const C* s);
+ };
+
+ typedef char_lcp_transcoder<char> char_transcoder;
+ }
+ }
+}
+
+#include <xsd/cxx/xml/char-lcp.txx>
+
+#else
+# ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ //
+ // If you get this error, it usually means that either you compiled
+ // your schemas with different --char-encoding values or you included
+ // some of the libxsd headers (e.g., xsd/cxx/xml/string.hxx) directly
+ // without first including the correct xsd/cxx/xml/char-*.hxx header.
+ //
+# error conflicting character encoding detected
+# endif
+#endif // XSD_CXX_XML_TRANSCODER
diff --git a/libxsd/xsd/cxx/xml/char-lcp.txx b/libxsd/xsd/cxx/xml/char-lcp.txx
new file mode 100644
index 0000000..01bb36e
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-lcp.txx
@@ -0,0 +1,55 @@
+// file : xsd/cxx/xml/char-lcp.txx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <cstring> // std::memcpy
+
+#include <xercesc/util/XMLString.hpp>
+
+#include <xsd/cxx/auto-array.hxx>
+#include <xsd/cxx/xml/std-memory-manager.hxx>
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ template <typename C>
+ std::basic_string<C> char_lcp_transcoder<C>::
+ to (const XMLCh* s)
+ {
+ std_memory_manager mm;
+ auto_array<C, std_memory_manager> r (
+ xercesc::XMLString::transcode (s, &mm), mm);
+ return std::basic_string<C> (r.get ());
+ }
+
+ template <typename C>
+ std::basic_string<C> char_lcp_transcoder<C>::
+ to (const XMLCh* s, std::size_t len)
+ {
+ auto_array<XMLCh> tmp (new XMLCh[len + 1]);
+ std::memcpy (tmp.get (), s, len * sizeof (XMLCh));
+ tmp[len] = XMLCh (0);
+
+ std_memory_manager mm;
+ auto_array<C, std_memory_manager> r (
+ xercesc::XMLString::transcode (tmp.get (), &mm), mm);
+
+ tmp.reset ();
+
+ return std::basic_string<C> (r.get ());
+ }
+
+ template <typename C>
+ XMLCh* char_lcp_transcoder<C>::
+ from (const C* s)
+ {
+ std_memory_manager mm;
+ return xercesc::XMLString::transcode (s, &mm);
+ }
+ }
+ }
+}
diff --git a/libxsd/xsd/cxx/xml/char-utf8.hxx b/libxsd/xsd/cxx/xml/char-utf8.hxx
new file mode 100644
index 0000000..c255b28
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-utf8.hxx
@@ -0,0 +1,57 @@
+// file : xsd/cxx/xml/char-utf8.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER
+#define XSD_CXX_XML_TRANSCODER_CHAR_UTF8
+
+#include <string>
+#include <cstddef> // std::size_t
+
+#include <xercesc/util/XercesDefs.hpp> // XMLCh
+
+#include <xsd/cxx/xml/exceptions.hxx> // invalid_utf16_string
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ struct invalid_utf8_string {};
+
+ // UTF-16 to/from UTF-8 transcoder.
+ //
+ template <typename C>
+ struct char_utf8_transcoder
+ {
+ static std::basic_string<C>
+ to (const XMLCh* s, std::size_t length);
+
+ static XMLCh*
+ from (const C* s, std::size_t length);
+
+ private:
+ static const unsigned char first_byte_mask_[5];
+ };
+
+ typedef char_utf8_transcoder<char> char_transcoder;
+ }
+ }
+}
+
+#include <xsd/cxx/xml/char-utf8.txx>
+
+#else
+# ifndef XSD_CXX_XML_TRANSCODER_CHAR_UTF8
+ //
+ // If you get this error, it usually means that either you compiled
+ // your schemas with different --char-encoding values or you included
+ // some of the libxsd headers (e.g., xsd/cxx/xml/string.hxx) directly
+ // without first including the correct xsd/cxx/xml/char-*.hxx header.
+ //
+# error conflicting character encoding detected
+# endif
+#endif // XSD_CXX_XML_TRANSCODER
diff --git a/libxsd/xsd/cxx/xml/char-utf8.txx b/libxsd/xsd/cxx/xml/char-utf8.txx
new file mode 100644
index 0000000..96b36a4
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/char-utf8.txx
@@ -0,0 +1,293 @@
+// file : xsd/cxx/xml/char-utf8.txx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <xsd/cxx/auto-array.hxx>
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ template <typename C>
+ const unsigned char char_utf8_transcoder<C>::first_byte_mask_[5] =
+ {
+ 0x00, 0x00, 0xC0, 0xE0, 0xF0
+ };
+
+ template <typename C>
+ std::basic_string<C> char_utf8_transcoder<C>::
+ to (const XMLCh* s, std::size_t len)
+ {
+ const XMLCh* end (s + len);
+
+ // Find what the resulting buffer size will be.
+ //
+ std::size_t rl (0);
+ unsigned int u (0); // Four byte UCS-4 char.
+
+ bool valid (true);
+ const XMLCh* p (s);
+
+ for (; p < end; ++p)
+ {
+ XMLCh x (*p);
+
+ if (x < 0xD800 || x > 0xDBFF)
+ u = x;
+ else
+ {
+ // Make sure we have one more char and it has a valid
+ // value for the second char in a surrogate pair.
+ //
+ if (++p == end || !((*p >= 0xDC00) && (*p <= 0xDFFF)))
+ {
+ valid = false;
+ break;
+ }
+
+ u = ((x - 0xD800) << 10) + (*p - 0xDC00) + 0x10000;
+ }
+
+ if (u < 0x80)
+ rl++;
+ else if (u < 0x800)
+ rl += 2;
+ else if (u < 0x10000)
+ rl += 3;
+ else if (u < 0x110000)
+ rl += 4;
+ else
+ {
+ valid = false;
+ break;
+ }
+ }
+
+ if (!valid)
+ throw invalid_utf16_string ();
+
+ std::basic_string<C> r;
+ r.reserve (rl + 1);
+ r.resize (rl);
+ C* rs (const_cast<C*> (r.c_str ()));
+
+ std::size_t i (0);
+ unsigned int count (0);
+
+ p = s;
+
+ // Tight first loop for the common case.
+ //
+ for (; p < end && *p < 0x80; ++p)
+ rs[i++] = C (*p);
+
+ for (; p < end; ++p)
+ {
+ XMLCh x (*p);
+
+ if ((x >= 0xD800) && (x <= 0xDBFF))
+ {
+ u = ((x - 0xD800) << 10) + (*++p - 0xDC00) + 0x10000;
+ }
+ else
+ u = x;
+
+ if (u < 0x80)
+ count = 1;
+ else if (u < 0x800)
+ count = 2;
+ else if (u < 0x10000)
+ count = 3;
+ else if (u < 0x110000)
+ count = 4;
+
+ switch(count)
+ {
+ case 4:
+ {
+ rs[i + 3] = C ((u | 0x80UL) & 0xBFUL);
+ u >>= 6;
+ }
+ case 3:
+ {
+ rs[i + 2] = C ((u | 0x80UL) & 0xBFUL);
+ u >>= 6;
+ }
+ case 2:
+ {
+ rs[i + 1] = C ((u | 0x80UL) & 0xBFUL);
+ u >>= 6;
+ }
+ case 1:
+ {
+ rs[i] = C (u | first_byte_mask_[count]);
+ }
+ }
+
+ i += count;
+ }
+
+ return r;
+ }
+
+ template <typename C>
+ XMLCh* char_utf8_transcoder<C>::
+ from (const C* s, std::size_t len)
+ {
+ bool valid (true);
+ const C* end (s + len);
+
+ // Find what the resulting buffer size will be.
+ //
+ std::size_t rl (0);
+ unsigned int count (0);
+
+ for (const C* p (s); p < end; ++p)
+ {
+ unsigned char c (*p);
+
+ if (c < 0x80)
+ {
+ // Fast path.
+ //
+ rl += 1;
+ continue;
+ }
+ else if ((c >> 5) == 0x06)
+ count = 2;
+ else if ((c >> 4) == 0x0E)
+ count = 3;
+ else if ((c >> 3) == 0x1E)
+ count = 4;
+ else
+ {
+ valid = false;
+ break;
+ }
+
+ p += count - 1; // One will be added in the for loop
+
+ if (p + 1 > end)
+ {
+ valid = false;
+ break;
+ }
+
+ // BMP is represented by up to 3 code points in UTF-8.
+ //
+ rl += count > 3 ? 2 : 1;
+ }
+
+ if (!valid)
+ throw invalid_utf8_string ();
+
+ auto_array<XMLCh> r (new XMLCh[rl + 1]);
+ XMLCh* ir (r.get ());
+
+ unsigned int u (0); // Four byte UCS-4 char.
+
+ for (const C* p (s); p < end; ++p)
+ {
+ unsigned char c (*p);
+
+ if (c < 0x80)
+ {
+ // Fast path.
+ //
+ *ir++ = static_cast<XMLCh> (c);
+ continue;
+ }
+ else if ((c >> 5) == 0x06)
+ {
+ // UTF-8: 110yyyyy 10zzzzzz
+ // Unicode: 00000yyy yyzzzzzz
+ //
+ u = (c & 0x1F) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u |= c & 0x3F;
+ }
+ else if ((c >> 4) == 0x0E)
+ {
+ // UTF-8: 1110xxxx 10yyyyyy 10zzzzzz
+ // Unicode: xxxxyyyy yyzzzzzz
+ //
+ u = (c & 0x0F) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u = (u | (c & 0x3F)) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u |= c & 0x3F;
+ }
+ else if ((c >> 3) == 0x1E)
+ {
+ // UTF-8: 000wwwxx xxxxyyyy yyzzzzzz
+ // Unicode: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
+ //
+ u = (c & 0x07) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u = (u | (c & 0x3F)) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u = (u | (c & 0x3F)) << 6;
+
+ c = *++p;
+ if ((c >> 6) != 2)
+ {
+ valid = false;
+ break;
+ }
+ u |= c & 0x3F;
+ }
+
+ if (u & 0xFFFF0000)
+ {
+ // Surrogate pair.
+ //
+ *ir++ = static_cast<XMLCh> (((u - 0x10000) >> 10) + 0xD800);
+ *ir++ = static_cast<XMLCh> ((u & 0x3FF) + 0xDC00);
+ }
+ else
+ *ir++ = static_cast<XMLCh> (u);
+ }
+
+ if (!valid)
+ throw invalid_utf8_string ();
+
+ *ir = XMLCh (0);
+
+ return r.release ();
+ }
+ }
+ }
+}
diff --git a/libxsd/xsd/cxx/xml/exceptions.hxx b/libxsd/xsd/cxx/xml/exceptions.hxx
new file mode 100644
index 0000000..6c2e029
--- /dev/null
+++ b/libxsd/xsd/cxx/xml/exceptions.hxx
@@ -0,0 +1,20 @@
+// file : xsd/cxx/xml/exceptions.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef XSD_CXX_XML_EXCEPTIONS_HXX
+#define XSD_CXX_XML_EXCEPTIONS_HXX
+
+namespace xsd
+{
+ namespace cxx
+ {
+ namespace xml
+ {
+ struct invalid_utf16_string {};
+ }
+ }
+}
+
+#endif // XSD_CXX_XML_EXCEPTIONS_HXX
diff --git a/libxsd/xsd/cxx/xml/string.hxx b/libxsd/xsd/cxx/xml/string.hxx
index 2d08134..ec666ee 100644
--- a/libxsd/xsd/cxx/xml/string.hxx
+++ b/libxsd/xsd/cxx/xml/string.hxx
@@ -7,6 +7,7 @@
#define XSD_CXX_XML_STRING_HXX
#include <string>
+#include <cstddef> // std::size_t
#include <xsd/cxx/auto-array.hxx>
#include <xercesc/util/XercesDefs.hpp> // XMLCh
@@ -17,12 +18,6 @@ namespace xsd
{
namespace xml
{
- //
- //
- struct invalid_utf8_string {};
- struct invalid_utf16_string {};
-
-
// Transcode a null-terminated string.
//
template <typename C>
@@ -84,7 +79,7 @@ namespace xsd
}
}
-#endif // XSD_CXX_XML_STRING_HXX
+#endif // XSD_CXX_XML_STRING_HXX
#include <xsd/cxx/xml/string.ixx>
#include <xsd/cxx/xml/string.txx>
diff --git a/libxsd/xsd/cxx/xml/string.ixx b/libxsd/xsd/cxx/xml/string.ixx
index bde86d8..056a15f 100644
--- a/libxsd/xsd/cxx/xml/string.ixx
+++ b/libxsd/xsd/cxx/xml/string.ixx
@@ -6,11 +6,13 @@
#ifndef XSD_CXX_XML_STRING_IXX
#define XSD_CXX_XML_STRING_IXX
-#include <cassert>
-#include <cstring> // std::memcpy
-
#include <xercesc/util/XMLString.hpp>
-#include <xsd/cxx/xml/std-memory-manager.hxx>
+
+// If no transcoder has been included, use the default UTF-8.
+//
+#ifndef XSD_CXX_XML_TRANSCODER
+# include <xsd/cxx/xml/char-utf8.hxx>
+#endif
// We sometimes need this functionality even if we are building for
// wchar_t.
@@ -21,43 +23,17 @@ namespace xsd
{
namespace xml
{
-#ifndef XSD_USE_LCP
- namespace bits
- {
- // UTF-16 to/from UTF-8 transcoder.
- //
- template <typename C>
- struct char_transcoder
- {
- static std::basic_string<C>
- to (const XMLCh* s, std::size_t length);
-
- static XMLCh*
- from (const C* s, std::size_t length);
-
- private:
- static const unsigned char first_byte_mask_[5];
- };
- }
-#endif
-
template <>
inline std::basic_string<char>
transcode<char> (const XMLCh* s)
{
- if (s == 0)
+ if (s == 0 || *s == XMLCh (0))
return std::basic_string<char> ();
-#ifndef XSD_USE_LCP
- return bits::char_transcoder<char>::to (
- s, xercesc::XMLString::stringLen (s));
+#ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ return char_transcoder::to (s, xercesc::XMLString::stringLen (s));
#else
- // Use Xerces-C++ local code page transcoding.
- //
- std_memory_manager mm;
- auto_array<char, std_memory_manager> r (
- xercesc::XMLString::transcode (s, &mm), mm);
- return std::basic_string<char> (r.get ());
+ return char_transcoder::to (s);
#endif
}
@@ -68,41 +44,17 @@ namespace xsd
if (s == 0 || len == 0)
return std::basic_string<char> ();
-#ifndef XSD_USE_LCP
- // Convert UTF-16 to UTF-8
- //
- return bits::char_transcoder<char>::to (s, len);
-#else
- // Use Xerces-C++ local code page transcoding.
- //
- auto_array<XMLCh> tmp (new XMLCh[len + 1]);
- std::memcpy (tmp.get (), s, len * sizeof (XMLCh));
- tmp[len] = XMLCh (0);
-
- std_memory_manager mm;
- auto_array<char, std_memory_manager> r (
- xercesc::XMLString::transcode (tmp.get (), &mm), mm);
-
- tmp.reset ();
-
- return std::basic_string<char> (r.get ());
-#endif
+ return char_transcoder::to (s, len);
}
template <>
inline XMLCh*
transcode_to_xmlch (const char* s)
{
-#ifndef XSD_USE_LCP
- // Convert UTF-8 to UTF-16
- //
- return bits::char_transcoder<char>::from (
- s, std::char_traits<char>::length (s));
+#ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ return char_transcoder::from (s, std::char_traits<char>::length (s));
#else
- // Use Xerces-C++ local code page transcoding.
- //
- std_memory_manager mm;
- return xercesc::XMLString::transcode (s, &mm);
+ return char_transcoder::from (s);
#endif
}
@@ -110,16 +62,10 @@ namespace xsd
inline XMLCh*
transcode_to_xmlch (const std::basic_string<char>& s)
{
-#ifndef XSD_USE_LCP
- // Convert UTF-8 to UTF-16
- //
- return bits::char_transcoder<char>::from (
- s.c_str (), s.length ());
+#ifndef XSD_CXX_XML_TRANSCODER_CHAR_LCP
+ return char_transcoder::from (s.c_str (), s.length ());
#else
- // Use Xerces-C++ local code page transcoding.
- //
- std_memory_manager mm;
- return xercesc::XMLString::transcode (s.c_str (), &mm);
+ return char_transcoder::from (s.c_str ());
#endif
}
}
diff --git a/libxsd/xsd/cxx/xml/string.txx b/libxsd/xsd/cxx/xml/string.txx
index cdef87e..f71480e 100644
--- a/libxsd/xsd/cxx/xml/string.txx
+++ b/libxsd/xsd/cxx/xml/string.txx
@@ -6,306 +6,16 @@
#ifndef XSD_CXX_XML_STRING_TXX
#define XSD_CXX_XML_STRING_TXX
-#ifndef XSD_USE_LCP
-namespace xsd
-{
- namespace cxx
- {
- namespace xml
- {
- namespace bits
- {
- template <typename C>
- const unsigned char char_transcoder<C>::first_byte_mask_[5] =
- {
- 0x00, 0x00, 0xC0, 0xE0, 0xF0
- };
-
- template <typename C>
- std::basic_string<C> char_transcoder<C>::
- to (const XMLCh* s, std::size_t len)
- {
- const XMLCh* end (s + len);
-
- // Find what the resulting buffer size will be.
- //
- std::size_t rl (0);
- unsigned int u (0); // Four byte UCS-4 char.
-
- bool valid (true);
- const XMLCh* p (s);
- for (; p < end; ++p)
- {
- XMLCh x (*p);
-
- if (x < 0xD800 || x > 0xDBFF)
- u = x;
- else
- {
- // Make sure we have one more char and it has a valid
- // value for the second char in a surrogate pair.
- //
- if (++p == end || !((*p >= 0xDC00) && (*p <= 0xDFFF)))
- {
- valid = false;
- break;
- }
-
- u = ((x - 0xD800) << 10) + (*p - 0xDC00) + 0x10000;
- }
-
- if (u < 0x80)
- rl++;
- else if (u < 0x800)
- rl += 2;
- else if (u < 0x10000)
- rl += 3;
- else if (u < 0x110000)
- rl += 4;
- else
- {
- valid = false;
- break;
- }
- }
-
- if (!valid)
- throw invalid_utf16_string ();
-
- std::basic_string<C> r;
- r.reserve (rl + 1);
- r.resize (rl);
- C* rs (const_cast<C*> (r.c_str ()));
-
- std::size_t i (0);
- unsigned int count (0);
-
- p = s;
-
- // Tight first loop for the common case.
- //
- for (; p < end && *p < 0x80; ++p)
- rs[i++] = C (*p);
-
- for (; p < end; ++p)
- {
- XMLCh x (*p);
-
- if ((x >= 0xD800) && (x <= 0xDBFF))
- {
- u = ((x - 0xD800) << 10) + (*++p - 0xDC00) + 0x10000;
- }
- else
- u = x;
-
- if (u < 0x80)
- count = 1;
- else if (u < 0x800)
- count = 2;
- else if (u < 0x10000)
- count = 3;
- else if (u < 0x110000)
- count = 4;
-
- switch(count)
- {
- case 4:
- {
- rs[i + 3] = C ((u | 0x80UL) & 0xBFUL);
- u >>= 6;
- }
- case 3:
- {
- rs[i + 2] = C ((u | 0x80UL) & 0xBFUL);
- u >>= 6;
- }
- case 2:
- {
- rs[i + 1] = C ((u | 0x80UL) & 0xBFUL);
- u >>= 6;
- }
- case 1:
- {
- rs[i] = C (u | first_byte_mask_[count]);
- }
- }
-
- i += count;
- }
-
- return r;
- }
-
- template <typename C>
- XMLCh* char_transcoder<C>::
- from (const C* s, std::size_t len)
- {
- bool valid (true);
- const C* end (s + len);
-
- // Find what the resulting buffer size will be.
- //
- std::size_t rl (0);
- unsigned int count (0);
-
- for (const C* p (s); p < end; ++p)
- {
- unsigned char c (*p);
-
- if (c < 0x80)
- {
- // Fast path.
- //
- rl += 1;
- continue;
- }
- else if ((c >> 5) == 0x06)
- count = 2;
- else if ((c >> 4) == 0x0E)
- count = 3;
- else if ((c >> 3) == 0x1E)
- count = 4;
- else
- {
- valid = false;
- break;
- }
-
- p += count - 1; // One will be added in the for loop
-
- if (p + 1 > end)
- {
- valid = false;
- break;
- }
-
- // BMP is represented by up to 3 code points in UTF-8.
- //
- rl += count > 3 ? 2 : 1;
- }
-
- if (!valid)
- throw invalid_utf8_string ();
-
- auto_array<XMLCh> r (new XMLCh[rl + 1]);
- XMLCh* ir (r.get ());
-
- unsigned int u (0); // Four byte UCS-4 char.
-
- for (const C* p (s); p < end; ++p)
- {
- unsigned char c (*p);
-
- if (c < 0x80)
- {
- // Fast path.
- //
- *ir++ = static_cast<XMLCh> (c);
- continue;
- }
- else if ((c >> 5) == 0x06)
- {
- // UTF-8: 110yyyyy 10zzzzzz
- // Unicode: 00000yyy yyzzzzzz
- //
- u = (c & 0x1F) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u |= c & 0x3F;
- }
- else if ((c >> 4) == 0x0E)
- {
- // UTF-8: 1110xxxx 10yyyyyy 10zzzzzz
- // Unicode: xxxxyyyy yyzzzzzz
- //
- u = (c & 0x0F) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u = (u | (c & 0x3F)) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u |= c & 0x3F;
- }
- else if ((c >> 3) == 0x1E)
- {
- // UTF-8: 000wwwxx xxxxyyyy yyzzzzzz
- // Unicode: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
- //
- u = (c & 0x07) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u = (u | (c & 0x3F)) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u = (u | (c & 0x3F)) << 6;
-
- c = *++p;
- if ((c >> 6) != 2)
- {
- valid = false;
- break;
- }
- u |= c & 0x3F;
- }
-
- if (u & 0xFFFF0000)
- {
- // Surrogate pair.
- //
- *ir++ = static_cast<XMLCh> (((u - 0x10000) >> 10) + 0xD800);
- *ir++ = static_cast<XMLCh> ((u & 0x3FF) + 0xDC00);
- }
- else
- *ir++ = static_cast<XMLCh> (u);
- }
-
- if (!valid)
- throw invalid_utf8_string ();
-
- *ir = XMLCh (0);
-
- return r.release ();
- }
- }
- }
- }
-}
-
-#endif // XSD_USE_LCP
#endif // XSD_CXX_XML_STRING_TXX
-
#if defined(XSD_USE_WCHAR) || !defined(XSD_USE_CHAR)
#ifndef XSD_CXX_XML_STRING_TXX_WCHAR
#define XSD_CXX_XML_STRING_TXX_WCHAR
+#include <xsd/cxx/xml/exceptions.hxx>
+
namespace xsd
{
namespace cxx
diff --git a/tests/cxx/tree/encoding/char/iso-8859-1/driver.cxx b/tests/cxx/tree/encoding/char/iso-8859-1/driver.cxx
new file mode 100644
index 0000000..9bd5725
--- /dev/null
+++ b/tests/cxx/tree/encoding/char/iso-8859-1/driver.cxx
@@ -0,0 +1,76 @@
+// file : tests/cxx/tree/encoding/char/iso-8859-1/driver.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+// Test ISO-8859-1 encoding.
+//
+
+#include <memory> // std::auto_ptr
+#include <fstream>
+#include <iostream>
+
+#include "test.hxx"
+
+using namespace std;
+using namespace test;
+
+int
+main (int argc, char* argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "usage: " << argv[0] << " test.xml" << endl;
+ return 1;
+ }
+
+ try
+ {
+ try
+ {
+ root (argv[1]);
+ return 1;
+ }
+ catch (xsd::cxx::xml::iso8859_1_unrepresentable const&)
+ {
+ }
+
+ xsd::cxx::xml::char_transcoder::unrep_char ('?');
+ auto_ptr<type> r (root (argv[1]));
+
+ {
+ type::a_sequence const& s (r->a ());
+
+ if (s[0] != "abc" ||
+ s[1] != "\xE6" ||
+ s[2] != "\xA2\xA3\xA4\xA5" ||
+ s[3] != "???")
+ {
+ cerr << "invalid encoding" << endl;
+ return 1;
+ }
+ }
+
+ {
+ type::b_sequence const& s (r->b ());
+
+ if (s[0] != strenum::abc ||
+ s[1] != strenum::a_c ||
+ s[2] != strenum::cxx__bc)
+ {
+ cerr << "invalid encoding" << endl;
+ return 1;
+ }
+ }
+
+ xml_schema::namespace_infomap map;
+ map["t"].name = "test";
+
+ root (std::cout, *r, map, "ISO-8859-1");
+ }
+ catch (xml_schema::exception const& e)
+ {
+ cerr << "xml_schema::exception: " << e.what () << endl;
+ return 1;
+ }
+}
diff --git a/tests/cxx/tree/encoding/char/iso-8859-1/makefile b/tests/cxx/tree/encoding/char/iso-8859-1/makefile
new file mode 100644
index 0000000..dd48fc1
--- /dev/null
+++ b/tests/cxx/tree/encoding/char/iso-8859-1/makefile
@@ -0,0 +1,83 @@
+# file : tests/cxx/tree/encoding/char/iso-8859-1/makefile
+# author : Boris Kolpackov <boris@codesynthesis.com>
+# copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC
+# license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+include $(dir $(lastword $(MAKEFILE_LIST)))../../../../../../build/bootstrap.make
+
+xsd := test.xsd
+cxx := driver.cxx
+
+obj := $(addprefix $(out_base)/,$(cxx:.cxx=.o) $(xsd:.xsd=.o))
+dep := $(obj:.o=.o.d)
+
+driver := $(out_base)/driver
+test := $(out_base)/.test
+clean := $(out_base)/.clean
+
+
+# Import.
+#
+$(call import,\
+ $(scf_root)/import/libxerces-c/stub.make,\
+ l: xerces_c.l,cpp-options: xerces_c.l.cpp-options)
+
+
+# Build.
+#
+$(driver): $(obj) $(xerces_c.l)
+
+$(obj) $(dep): cpp_options := -I$(src_root)/libxsd
+$(obj) $(dep): $(xerces_c.l.cpp-options)
+
+genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.ixx) $(xsd:.xsd=.cxx)
+gen := $(addprefix $(out_base)/,$(genf))
+
+$(gen): xsd := $(out_root)/xsd/xsd
+$(gen): xsd_options := --char-encoding iso8859-1 --generate-serialization \
+--generate-doxygen
+$(gen): $(out_root)/xsd/xsd
+
+$(call include-dep,$(dep))
+
+# Convenience alias for default target.
+#
+$(out_base)/: $(driver)
+
+
+# Test.
+#
+$(test): driver := $(driver)
+$(test): $(driver) $(src_base)/test.xml $(src_base)/test.std
+ $(call message,test $$1,$$1 $(src_base)/test.xml | diff -u $(src_base)/test.std -,$(driver))
+
+# Clean.
+#
+$(clean): $(driver).o.clean \
+ $(addsuffix .cxx.clean,$(obj)) \
+ $(addsuffix .cxx.clean,$(dep)) \
+ $(addprefix $(out_base)/,$(xsd:.xsd=.cxx.xsd.clean))
+
+# Generated .gitignore.
+#
+ifeq ($(out_base),$(src_base))
+$(gen): | $(out_base)/.gitignore
+$(driver): | $(out_base)/.gitignore
+
+$(out_base)/.gitignore: files := driver $(genf)
+$(clean): $(out_base)/.gitignore.clean
+
+$(call include,$(bld_root)/git/gitignore.make)
+endif
+
+# How to.
+#
+$(call include,$(bld_root)/cxx/o-e.make)
+$(call include,$(bld_root)/cxx/cxx-o.make)
+$(call include,$(bld_root)/cxx/cxx-d.make)
+$(call include,$(scf_root)/xsd/tree/xsd-cxx.make)
+
+
+# Dependencies.
+#
+$(call import,$(src_root)/xsd/makefile)
diff --git a/tests/cxx/tree/encoding/char/iso-8859-1/test.std b/tests/cxx/tree/encoding/char/iso-8859-1/test.std
new file mode 100644
index 0000000..ca6297f
--- /dev/null
+++ b/tests/cxx/tree/encoding/char/iso-8859-1/test.std
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="ISO-8859-1" standalone="no" ?>
+<t:root xmlns:t="test">
+
+ <a>abc</a>
+
+ <a></a>
+
+ <a></a>
+
+ <a>???</a>
+
+ <b>abc</b>
+
+ <b>ac</b>
+
+ <b>bc</b>
+
+</t:root>
diff --git a/tests/cxx/tree/encoding/char/iso-8859-1/test.xml b/tests/cxx/tree/encoding/char/iso-8859-1/test.xml
new file mode 100644
index 0000000..9c9e752
--- /dev/null
+++ b/tests/cxx/tree/encoding/char/iso-8859-1/test.xml
@@ -0,0 +1,14 @@
+<t:root xmlns:t="test"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="test test.xsd">
+
+ <a>abc</a>
+ <a>&#xE6;</a>
+ <a>&#xA2;&#xA3;&#xA4;&#xA5;</a>
+ <a>&#x100;&#xAAAA;&#xAAAAA;</a>
+
+ <b>abc</b>
+ <b>a&#xE2;c</b>
+ <b>&#xE2;&#xF2;bc</b>
+
+</t:root>
diff --git a/tests/cxx/tree/encoding/char/iso-8859-1/test.xsd b/tests/cxx/tree/encoding/char/iso-8859-1/test.xsd
new file mode 100644
index 0000000..31b8901
--- /dev/null
+++ b/tests/cxx/tree/encoding/char/iso-8859-1/test.xsd
@@ -0,0 +1,31 @@
+<?xml version="1.0"?>
+<schema xmlns="http://www.w3.org/2001/XMLSchema" xmlns:t="test" targetNamespace="test">
+
+ <simpleType name="strenum">
+
+ <annotation>
+ <documentation>
+ Test enum. Valid values are:
+ abc
+ a&#xE2;c
+ &#xF2;bc
+ </documentation>
+ </annotation>
+
+ <restriction base="string">
+ <enumeration value="abc"/>
+ <enumeration value="a&#xE2;c"/>
+ <enumeration value="&#xE2;&#xF2;bc"/>
+ </restriction>
+ </simpleType>
+
+ <complexType name="type">
+ <sequence>
+ <element name="a" type="string" maxOccurs="unbounded"/>
+ <element name="b" type="t:strenum" maxOccurs="unbounded"/>
+ </sequence>
+ </complexType>
+
+ <element name="root" type="t:type"/>
+
+</schema>
diff --git a/tests/cxx/tree/encoding/char/lcp/driver.cxx b/tests/cxx/tree/encoding/char/lcp/driver.cxx
index 3d30aa9..7bc4a2d 100644
--- a/tests/cxx/tree/encoding/char/lcp/driver.cxx
+++ b/tests/cxx/tree/encoding/char/lcp/driver.cxx
@@ -3,7 +3,7 @@
// copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
-// Test local code page encoding (XSD_USE_LCP defined).
+// Test local code page encoding (--char-encoding lcp).
// The test just makes sure it still compiles and works.
//
diff --git a/tests/cxx/tree/encoding/char/lcp/makefile b/tests/cxx/tree/encoding/char/lcp/makefile
index 9520a42..324e331 100644
--- a/tests/cxx/tree/encoding/char/lcp/makefile
+++ b/tests/cxx/tree/encoding/char/lcp/makefile
@@ -27,14 +27,14 @@ $(call import,\
#
$(driver): $(obj) $(xerces_c.l)
-$(obj) $(dep): cpp_options := -I$(src_root)/libxsd -DXSD_USE_LCP
+$(obj) $(dep): cpp_options := -I$(src_root)/libxsd
$(obj) $(dep): $(xerces_c.l.cpp-options)
genf := $(xsd:.xsd=.hxx) $(xsd:.xsd=.ixx) $(xsd:.xsd=.cxx)
gen := $(addprefix $(out_base)/,$(genf))
$(gen): xsd := $(out_root)/xsd/xsd
-$(gen): xsd_options := --generate-serialization
+$(gen): xsd_options := --generate-serialization --char-encoding lcp
$(gen): $(out_root)/xsd/xsd
$(call include-dep,$(dep))
diff --git a/tests/cxx/tree/encoding/char/makefile b/tests/cxx/tree/encoding/char/makefile
index 78b6e7a..ef25ad3 100644
--- a/tests/cxx/tree/encoding/char/makefile
+++ b/tests/cxx/tree/encoding/char/makefile
@@ -5,7 +5,7 @@
include $(dir $(lastword $(MAKEFILE_LIST)))../../../../../build/bootstrap.make
-tests := lcp utf-8
+tests := lcp utf-8 iso-8859-1
default := $(out_base)/
test := $(out_base)/.test
diff --git a/tests/cxx/tree/encoding/char/utf-8/makefile b/tests/cxx/tree/encoding/char/utf-8/makefile
index 9fbbc7c..da5d7b4 100644
--- a/tests/cxx/tree/encoding/char/utf-8/makefile
+++ b/tests/cxx/tree/encoding/char/utf-8/makefile
@@ -1,4 +1,4 @@
-# file : tests/cxx/tree/encoding/char/lcp/makefile
+# file : tests/cxx/tree/encoding/char/utf-8/makefile
# author : Boris Kolpackov <boris@codesynthesis.com>
# copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC
# license : GNU GPL v2 + exceptions; see accompanying LICENSE file
diff --git a/xsd/cxx/elements.cxx b/xsd/cxx/elements.cxx
index fd23fc0..764d7da 100644
--- a/xsd/cxx/elements.cxx
+++ b/xsd/cxx/elements.cxx
@@ -8,7 +8,9 @@
#include <backend-elements/regex.hxx>
#include <cctype> // std::toupper
+#include <memory>
#include <sstream>
+#include <fstream>
#include <iostream>
using std::wcerr;
@@ -111,7 +113,9 @@ namespace CXX
Context::
Context (std::wostream& o,
SemanticGraph::Schema& root,
+ StringLiteralMap const* string_literal_map_,
NarrowString const& char_type__,
+ NarrowString const& char_encoding__,
Boolean include_with_brackets__,
NarrowString const& include_prefix__,
NarrowString const& esymbol,
@@ -125,8 +129,10 @@ namespace CXX
: os (o),
schema_root (root),
char_type (char_type_),
+ char_encoding (char_encoding_),
L (L_),
string_type (string_type_),
+ string_literal_map (string_literal_map_),
include_with_brackets (include_with_brackets_),
include_prefix (include_prefix_),
type_exp (type_exp_),
@@ -135,6 +141,7 @@ namespace CXX
ns_mapping_cache (ns_mapping_cache_),
xs_ns_ (0),
char_type_ (char_type__),
+ char_encoding_ (char_encoding__),
L_ (char_type == L"wchar_t" ? L"L" : L""),
include_with_brackets_ (include_with_brackets__),
include_prefix_ (include_prefix__),
@@ -177,7 +184,7 @@ namespace CXX
xs_ns_ = dynamic_cast<SemanticGraph::Namespace*> (n);
}
- //
+ // String type.
//
if (char_type == L"char")
string_type_ = L"::std::string";
@@ -186,6 +193,16 @@ namespace CXX
else
string_type_ = L"::std::basic_string< " + char_type + L" >";
+ // Default encoding.
+ //
+ if (!char_encoding)
+ {
+ if (char_type == L"char")
+ char_encoding = L"utf8";
+ else
+ char_encoding = L"auto";
+ }
+
// Default mapping.
//
nsr_mapping_.push_back (
@@ -615,6 +632,121 @@ namespace CXX
return r;
}
+ String
+ strlit_ascii (String const& str)
+ {
+ String r;
+ Size n (str.size ());
+
+ // In most common cases we will have that many chars.
+ //
+ r.reserve (n + 2);
+
+ r += '"';
+
+ Boolean escape (false);
+
+ for (Size i (0); i < n; ++i)
+ {
+ UnsignedLong u (Context::unicode_char (str, i)); // May advance i.
+
+ // [128 - ] - unrepresentable
+ // 127 - \x7F
+ // [32 - 126] - as is
+ // [0 - 31] - \X or \xXX
+ //
+
+ if (u < 32 || u == 127)
+ {
+ switch (u)
+ {
+ case L'\n':
+ {
+ r += L"\\n";
+ break;
+ }
+ case L'\t':
+ {
+ r += L"\\t";
+ break;
+ }
+ case L'\v':
+ {
+ r += L"\\v";
+ break;
+ }
+ case L'\b':
+ {
+ r += L"\\b";
+ break;
+ }
+ case L'\r':
+ {
+ r += L"\\r";
+ break;
+ }
+ case L'\f':
+ {
+ r += L"\\f";
+ break;
+ }
+ case L'\a':
+ {
+ r += L"\\a";
+ break;
+ }
+ default:
+ {
+ r += charlit (u);
+ escape = true;
+ break;
+ }
+ }
+ }
+ else if (u < 127)
+ {
+ if (escape)
+ {
+ // Close and open the string so there are no clashes.
+ //
+ r += '"';
+ r += '"';
+
+ escape = false;
+ }
+
+ switch (u)
+ {
+ case L'"':
+ {
+ r += L"\\\"";
+ break;
+ }
+ case L'\\':
+ {
+ r += L"\\\\";
+ break;
+ }
+ default:
+ {
+ r += static_cast<WideChar> (u);
+ break;
+ }
+ }
+ }
+ else
+ {
+ // Unrepresentable character.
+ //
+ throw UnrepresentableCharacter (str, i + 1);
+ }
+ }
+
+ r += '"';
+
+ return r;
+ }
+
const UnsignedLong utf8_first_char_mask[5] =
{
0x00, 0x00, 0xC0, 0xE0, 0xF0
@@ -770,6 +902,126 @@ namespace CXX
}
String
+ strlit_iso8859_1 (String const& str)
+ {
+ String r;
+ Size n (str.size ());
+
+ // In most common cases we will have that many chars.
+ //
+ r.reserve (n + 2);
+
+ r += '"';
+
+ Boolean escape (false);
+
+ for (Size i (0); i < n; ++i)
+ {
+ UnsignedLong u (Context::unicode_char (str, i)); // May advance i.
+
+ // [256 - ] - unrepresentable
+ // [127 - 255] - \xXX
+ // [32 - 126] - as is
+ // [0 - 31] - \X or \xXX
+ //
+
+ if (u < 32)
+ {
+ switch (u)
+ {
+ case L'\n':
+ {
+ r += L"\\n";
+ break;
+ }
+ case L'\t':
+ {
+ r += L"\\t";
+ break;
+ }
+ case L'\v':
+ {
+ r += L"\\v";
+ break;
+ }
+ case L'\b':
+ {
+ r += L"\\b";
+ break;
+ }
+ case L'\r':
+ {
+ r += L"\\r";
+ break;
+ }
+ case L'\f':
+ {
+ r += L"\\f";
+ break;
+ }
+ case L'\a':
+ {
+ r += L"\\a";
+ break;
+ }
+ default:
+ {
+ r += charlit (u);
+ escape = true;
+ break;
+ }
+ }
+ }
+ else if (u < 127)
+ {
+ if (escape)
+ {
+ // Close and open the string so there are no clashes.
+ //
+ r += '"';
+ r += '"';
+
+ escape = false;
+ }
+
+ switch (u)
+ {
+ case L'"':
+ {
+ r += L"\\\"";
+ break;
+ }
+ case L'\\':
+ {
+ r += L"\\\\";
+ break;
+ }
+ default:
+ {
+ r += static_cast<WideChar> (u);
+ break;
+ }
+ }
+ }
+ else if (u < 256)
+ {
+ r += charlit (u);
+ escape = true;
+ }
+ else
+ {
+ // Unrepresentable character.
+ //
+ throw UnrepresentableCharacter (str, i + 1);
+ }
+ }
+
+ r += '"';
+
+ return r;
+ }
+
+ String
strlit_utf32 (String const& str)
{
String r;
@@ -886,8 +1138,27 @@ namespace CXX
String Context::
strlit (String const& str)
{
+ // First see if we have a custom mapping.
+ //
+ assert (string_literal_map != 0);
+ StringLiteralMap::ConstIterator i (string_literal_map->find (str));
+
+ if (i != string_literal_map->end ())
+ return i->second;
+
if (char_type == L"char")
- return strlit_utf8 (str);
+ {
+ if (char_encoding == L"utf8")
+ return strlit_utf8 (str);
+ else if (char_encoding == L"iso8859-1")
+ return strlit_iso8859_1 (str);
+ else
+ {
+ // For LCP, custom, and other unknown encodings, use ASCII.
+ //
+ return strlit_ascii (str);
+ }
+ }
else
return strlit_utf32 (str);
}
diff --git a/xsd/cxx/elements.hxx b/xsd/cxx/elements.hxx
index 39eee77..3bbacd0 100644
--- a/xsd/cxx/elements.hxx
+++ b/xsd/cxx/elements.hxx
@@ -6,6 +6,8 @@
#ifndef CXX_ELEMENTS_HXX
#define CXX_ELEMENTS_HXX
+#include <ostream>
+
#include <cult/types.hxx>
#include <cult/containers/set.hxx>
#include <cult/containers/map.hxx>
@@ -17,8 +19,7 @@
#include <xsd-frontend/traversal.hxx>
#include <elements.hxx>
-
-#include <ostream>
+#include <cxx/literal-map.hxx>
namespace CXX
{
@@ -36,6 +37,30 @@ namespace CXX
// Exceptions.
//
+ struct UnrepresentableCharacter
+ {
+ UnrepresentableCharacter (String const& str, Size pos)
+ : str_ (str), pos_ (pos)
+ {
+ }
+
+ String const&
+ string () const
+ {
+ return str_;
+ }
+
+ Size
+ position () const
+ {
+ return pos_;
+ }
+
+ private:
+ String str_;
+ Size pos_;
+ };
+
struct NoNamespaceMapping
{
NoNamespaceMapping (SemanticGraph::Path const& file,
@@ -106,7 +131,6 @@ namespace CXX
String reason_;
};
-
//
//
class Context
@@ -124,7 +148,9 @@ namespace CXX
public:
Context (std::wostream& o,
SemanticGraph::Schema& root,
+ StringLiteralMap const* custom_literals_map,
NarrowString const& char_type__,
+ NarrowString const& char_encoding__,
Boolean include_with_brackets__,
NarrowString const& include_prefix__,
NarrowString const& esymbol,
@@ -141,8 +167,10 @@ namespace CXX
: os (c.os),
schema_root (c.schema_root),
char_type (c.char_type),
+ char_encoding (c.char_encoding),
L (c.L),
string_type (c.string_type),
+ string_literal_map (c.string_literal_map),
include_with_brackets (c.include_with_brackets),
include_prefix (c.include_prefix),
type_exp (c.type_exp),
@@ -166,8 +194,10 @@ namespace CXX
: os (o),
schema_root (c.schema_root),
char_type (c.char_type),
+ char_encoding (c.char_encoding),
L (c.L),
string_type (c.string_type),
+ string_literal_map (c.string_literal_map),
include_with_brackets (c.include_with_brackets),
include_prefix (c.include_prefix),
type_exp (c.type_exp),
@@ -309,8 +339,10 @@ namespace CXX
SemanticGraph::Schema& schema_root;
String& char_type;
+ String& char_encoding;
String& L; // string literal prefix
String& string_type;
+ StringLiteralMap const* string_literal_map;
Boolean& include_with_brackets;
String& include_prefix;
@@ -326,6 +358,7 @@ namespace CXX
SemanticGraph::Namespace* xs_ns_;
String char_type_;
+ String char_encoding_;
String L_;
String string_type_;
diff --git a/xsd/cxx/literal-map.cxx b/xsd/cxx/literal-map.cxx
new file mode 100644
index 0000000..f3f7ee0
--- /dev/null
+++ b/xsd/cxx/literal-map.cxx
@@ -0,0 +1,296 @@
+// file : xsd/cxx/literal-map.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <memory> // std::auto_ptr
+#include <cstddef> // std::size_t
+#include <fstream>
+#include <iostream>
+
+#include <xercesc/util/XMLUni.hpp>
+#include <xercesc/util/XercesVersion.hpp>
+
+#include <xercesc/framework/LocalFileInputSource.hpp>
+
+#include <xercesc/sax/Locator.hpp>
+#include <xercesc/sax/SAXParseException.hpp>
+#include <xercesc/sax2/DefaultHandler.hpp>
+#include <xercesc/sax2/SAX2XMLReader.hpp>
+#include <xercesc/sax2/XMLReaderFactory.hpp>
+
+#include <xsd-frontend/xml.hxx>
+
+#include <cxx/literal-map.hxx>
+
+using namespace std;
+using namespace xercesc;
+namespace XML = XSDFrontend::XML;
+
+namespace CXX
+{
+ class Handler: public DefaultHandler
+ {
+ public:
+ struct Failed {};
+
+ Handler (String const& file, StringLiteralMap& map)
+ : state_ (s_init), file_ (file), map_ (map)
+ {
+ }
+
+ virtual void
+ setDocumentLocator (const Locator* const l)
+ {
+ locator_ = l;
+ }
+
+ virtual Void
+ startElement (const XMLCh* const,
+ const XMLCh* const lname,
+ const XMLCh* const,
+ const xercesc::Attributes&)
+ {
+ String n (XML::transcode (lname));
+
+ if (n == L"string-literal-map" && state_ == s_init)
+ state_ = s_map;
+ else if (n == L"entry" && state_ == s_map)
+ {
+ str_seen_ = false;
+ lit_seen_ = false;
+ state_ = s_entry;
+ }
+ else if (n == L"string" && state_ == s_entry)
+ {
+ str_seen_ = true;
+ str_.clear ();
+ state_ = s_string;
+ }
+ else if (n == L"literal" && state_ == s_entry)
+ {
+ lit_seen_ = true;
+ lit_.clear ();
+ state_ = s_literal;
+ }
+ else
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "unexpected element '" << n << "'" << endl;
+ throw Failed ();
+ }
+ }
+
+ virtual Void
+ endElement (const XMLCh* const,
+ const XMLCh* const lname,
+ const XMLCh* const)
+ {
+ String n (XML::transcode (lname));
+
+ if (n == L"string-literal-map")
+ state_ = s_init;
+ else if (n == L"entry")
+ {
+ if (!str_seen_)
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "expected 'string' element" << endl;
+ throw Failed ();
+ }
+
+ if (!lit_seen_)
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "expected 'literal' element" << endl;
+ throw Failed ();
+ }
+
+ map_[str_] = lit_;
+ state_ = s_map;
+ }
+ else if (n == L"string")
+ state_ = s_entry;
+ else if (n == L"literal")
+ state_ = s_entry;
+ }
+
+#if _XERCES_VERSION >= 30000
+ virtual Void
+ characters (const XMLCh* const s, const XMLSize_t length)
+#else
+ virtual Void
+ characters (const XMLCh* const s, const unsigned int length)
+#endif
+ {
+ String str (XML::transcode (s, length));
+
+ if (state_ == s_string)
+ str_ += str;
+ else if (state_ == s_literal)
+ lit_ += str;
+ else
+ {
+ for (Size i (0); i < str.size (); ++i)
+ {
+ WideChar c (str[i]);
+
+ if (c != 0x20 && c != 0x0A && c != 0x0D && c != 0x09)
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "unexpected character data" << endl;
+ throw Failed ();
+ }
+ }
+ }
+ }
+
+ // Error hanlding.
+ //
+ enum Severity {s_warning, s_error, s_fatal};
+
+ virtual Void
+ warning (const SAXParseException& e)
+ {
+ handle (e, s_warning);
+ }
+
+ virtual Void
+ error (const SAXParseException& e)
+ {
+ handle (e, s_error);
+ }
+
+ virtual Void
+ fatalError (const SAXParseException& e)
+ {
+ handle (e, s_fatal);
+ }
+
+ virtual Void
+ resetErrors ()
+ {
+ }
+
+ Void
+ handle (const SAXParseException& e, Severity s)
+ {
+ wcerr << file_ << ":";
+
+#if _XERCES_VERSION >= 30000
+ wcerr << e.getLineNumber () << ":" << e.getColumnNumber () << ": ";
+#else
+ XMLSSize_t l (e.getLineNumber ());
+ XMLSSize_t c (e.getColumnNumber ());
+ wcerr << (l == -1 ? 0 : l) << ":" << (c == -1 ? 0 : c) << ": ";
+#endif
+
+ String msg (XML::transcode (e.getMessage ()));
+ wcerr << (s == s_warning ? "warning: " : "error: ") << msg << endl;
+
+ if (s != s_warning)
+ throw Failed ();
+ }
+
+ size_t
+ line () const
+ {
+ size_t r (0);
+
+ if (locator_ != 0)
+ {
+#if _XERCES_VERSION >= 30000
+ r = static_cast<size_t> (locator_->getLineNumber ());
+#else
+ XMLSSize_t l (locator_->getLineNumber ());
+ r = l == -1 ? 0 : static_cast<size_t> (l);
+#endif
+ }
+
+ return r;
+ }
+
+ size_t
+ col () const
+ {
+ size_t r (0);
+
+ if (locator_ != 0)
+ {
+#if _XERCES_VERSION >= 30000
+ r = static_cast<size_t> (locator_->getColumnNumber ());
+#else
+ XMLSSize_t c (locator_->getColumnNumber ());
+ r = c == -1 ? 0 : static_cast<size_t> (c);
+#endif
+ }
+
+ return r;
+ }
+
+ private:
+ const Locator* locator_;
+
+ enum
+ {
+ s_init,
+ s_map,
+ s_entry,
+ s_string,
+ s_literal
+ } state_;
+
+ String file_;
+ StringLiteralMap& map_;
+
+ Boolean str_seen_;
+ Boolean lit_seen_;
+
+ String str_;
+ String lit_;
+ };
+
+ bool
+ read_literal_map (NarrowString const& file, StringLiteralMap& map)
+ {
+ try
+ {
+ // Try to open the file with fstream. This way we get to
+ // report the error in a consistent manner.
+ //
+ {
+ ifstream ifs (file.c_str ());
+ if (!ifs.is_open ())
+ {
+ wcerr << file.c_str () << ": unable to open in read mode" << endl;
+ return false;
+ }
+ }
+
+ String wfile (file);
+
+ LocalFileInputSource is (XML::XMLChString (wfile).c_str ());
+ Handler h (wfile, map);
+
+ auto_ptr<SAX2XMLReader> parser (
+ XMLReaderFactory::createXMLReader ());
+
+ parser->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
+ parser->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
+ parser->setFeature (XMLUni::fgSAX2CoreValidation, false);
+ parser->setFeature (XMLUni::fgXercesSchema, false);
+ parser->setFeature (XMLUni::fgXercesSchemaFullChecking, false);
+
+ parser->setErrorHandler (&h);
+ parser->setContentHandler (&h);
+
+ parser->parse (is);
+ }
+ catch (Handler::Failed const&)
+ {
+ return false;
+ }
+
+ return true;
+ }
+}
diff --git a/xsd/cxx/literal-map.hxx b/xsd/cxx/literal-map.hxx
new file mode 100644
index 0000000..1120045
--- /dev/null
+++ b/xsd/cxx/literal-map.hxx
@@ -0,0 +1,23 @@
+// file : xsd/cxx/literal-map.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef CXX_LITERAL_MAP_HXX
+#define CXX_LITERAL_MAP_HXX
+
+#include <cult/types.hxx>
+#include <cult/containers/map.hxx>
+
+namespace CXX
+{
+ using namespace Cult;
+ typedef WideString String;
+
+ typedef Cult::Containers::Map<String, String> StringLiteralMap;
+
+ bool
+ read_literal_map (NarrowString const& file, StringLiteralMap& map);
+}
+
+#endif // CXX_LITERAL_MAP_HXX
diff --git a/xsd/cxx/parser/cli.hxx b/xsd/cxx/parser/cli.hxx
index 504de43..5f31af7 100644
--- a/xsd/cxx/parser/cli.hxx
+++ b/xsd/cxx/parser/cli.hxx
@@ -24,6 +24,7 @@ namespace CXX
typedef Char const Key[];
extern Key type_map;
+ extern Key char_encoding;
extern Key char_type;
extern Key output_dir;
extern Key xml_parser;
@@ -85,6 +86,7 @@ namespace CXX
typedef Cult::CLI::Options<
type_map, Cult::Containers::Vector<NarrowString>,
char_type, NarrowString,
+ char_encoding, NarrowString,
output_dir, NarrowString,
xml_parser, NarrowString,
generate_inline, Boolean,
diff --git a/xsd/cxx/parser/elements.cxx b/xsd/cxx/parser/elements.cxx
index 8a02ffb..09d1008 100644
--- a/xsd/cxx/parser/elements.cxx
+++ b/xsd/cxx/parser/elements.cxx
@@ -42,12 +42,15 @@ namespace CXX
Context (std::wostream& o,
SemanticGraph::Schema& root,
CLI::Options const& ops,
+ StringLiteralMap const* map,
Regex const* he,
Regex const* ie,
Regex const* hie)
: CXX::Context (o,
root,
+ map,
ops.value<CLI::char_type> (),
+ ops.value<CLI::char_encoding> (),
ops.value<CLI::include_with_brackets> (),
ops.value<CLI::include_prefix> (),
ops.value<CLI::export_symbol> (),
diff --git a/xsd/cxx/parser/elements.hxx b/xsd/cxx/parser/elements.hxx
index 90ff84e..61cde69 100644
--- a/xsd/cxx/parser/elements.hxx
+++ b/xsd/cxx/parser/elements.hxx
@@ -39,6 +39,7 @@ namespace CXX
Context (std::wostream&,
SemanticGraph::Schema&,
CLI::Options const&,
+ StringLiteralMap const*,
Regex const* hxx_expr,
Regex const* ixx_expr,
Regex const* hxx_impl_expr);
diff --git a/xsd/cxx/parser/generator.cxx b/xsd/cxx/parser/generator.cxx
index 342e3f2..ec08af4 100644
--- a/xsd/cxx/parser/generator.cxx
+++ b/xsd/cxx/parser/generator.cxx
@@ -126,9 +126,9 @@ namespace CXX
{
namespace CLI
{
- extern Key char_type;
extern Key type_map = "type-map";
extern Key char_type = "char-type";
+ extern Key char_encoding = "char-encoding";
extern Key output_dir = "output-dir";
extern Key xml_parser = "xml-parser";
extern Key generate_inline = "generate-inline";
@@ -206,6 +206,14 @@ namespace CXX
<< " values are 'char' (default) and 'wchar_t'."
<< endl;
+ e << "--char-encoding <enc>" << endl
+ << " Specify the character encoding that should be used\n"
+ << " in the object model. Valid values for the 'char'\n"
+ << " character type are 'utf8' (default), 'iso8859-1',\n"
+ << " 'lcp', and 'custom'. For the 'wchar_t' character\n"
+ << " type the only valid value is 'auto'."
+ << endl;
+
e << "--output-dir <dir>" << endl
<< " Write generated files to <dir> instead of current\n"
<< " directory."
@@ -471,6 +479,11 @@ namespace CXX
// Misc.
//
+ e << "--custom-literals <file>" << endl
+ << " Load custom XML string to C++ literal mappings\n"
+ << " from <file>."
+ << endl;
+
e << "--export-symbol <symbol>" << endl
<< " Export symbol for Win32 DLL export/import control."
<< endl;
@@ -600,6 +613,7 @@ namespace CXX
generate (Parser::CLI::Options const& ops,
Schema& schema,
Path const& file_path,
+ StringLiteralMap const& string_literal_map,
Boolean gen_driver,
const WarningSet& disabled_warnings,
FileList& file_list,
@@ -648,7 +662,7 @@ namespace CXX
//
{
NameProcessor proc;
- proc.process (ops, schema, file_path);
+ proc.process (ops, schema, file_path, string_literal_map);
}
Boolean validation ((ops.value<CLI::xml_parser> () == "expat" ||
@@ -701,7 +715,7 @@ namespace CXX
String xns;
{
- Context ctx (std::wcerr, schema, ops, 0, 0, 0);
+ Context ctx (std::wcerr, schema, ops, 0, 0, 0, 0);
xns = ctx.xs_ns_name ();
}
@@ -1144,7 +1158,13 @@ namespace CXX
// HXX
//
{
- Context ctx (hxx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (hxx,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
Indentation::Clip<Indentation::SLOC, WideChar> hxx_sloc (hxx);
@@ -1231,7 +1251,13 @@ namespace CXX
//
if (inline_)
{
- Context ctx (ixx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (ixx,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
Indentation::Clip<Indentation::SLOC, WideChar> ixx_sloc (ixx);
@@ -1287,7 +1313,13 @@ namespace CXX
//
if (source)
{
- Context ctx (cxx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (cxx,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
Indentation::Clip<Indentation::SLOC, WideChar> cxx_sloc (cxx);
@@ -1351,8 +1383,13 @@ namespace CXX
//
if (impl)
{
- Context ctx (hxx_impl, schema, ops,
- &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (hxx_impl,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
String guard (guard_expr.merge (guard_prefix + hxx_impl_name));
guard = ctx.escape (guard); // Make it a C++ id.
@@ -1380,8 +1417,13 @@ namespace CXX
//
if (impl)
{
- Context ctx (cxx_impl, schema, ops,
- &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (cxx_impl,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
// Set auto-indentation.
//
@@ -1397,8 +1439,13 @@ namespace CXX
//
if (driver)
{
- Context ctx (cxx_driver, schema, ops,
- &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (cxx_driver,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
// Set auto-indentation.
//
@@ -1412,6 +1459,17 @@ namespace CXX
return sloc;
}
+ catch (UnrepresentableCharacter const& e)
+ {
+ wcerr << "error: character at position " << e.position () << " "
+ << "in string '" << e.string () << "' is unrepresentable in "
+ << "the target encoding" << endl;
+
+ wcerr << "info: use the --custom-literals option to provide custom "
+ << "string literals mapping" << endl;
+
+ throw Failed ();
+ }
catch (NoNamespaceMapping const& e)
{
wcerr << e.file () << ":" << e.line () << ":" << e.column ()
diff --git a/xsd/cxx/parser/generator.hxx b/xsd/cxx/parser/generator.hxx
index aaab3b8..8c5631d 100644
--- a/xsd/cxx/parser/generator.hxx
+++ b/xsd/cxx/parser/generator.hxx
@@ -18,6 +18,7 @@
#include <xsd.hxx>
+#include <cxx/literal-map.hxx>
#include <cxx/parser/cli.hxx>
namespace CXX
@@ -41,6 +42,7 @@ namespace CXX
generate (CLI::Options const& options,
XSDFrontend::SemanticGraph::Schema&,
XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const&,
Boolean gen_driver,
const WarningSet& disabled_warnings,
FileList& file_list,
diff --git a/xsd/cxx/parser/name-processor.cxx b/xsd/cxx/parser/name-processor.cxx
index e9ba876..5f9209e 100644
--- a/xsd/cxx/parser/name-processor.cxx
+++ b/xsd/cxx/parser/name-processor.cxx
@@ -3,7 +3,6 @@
// copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
-#include <cxx/elements.hxx>
#include <cxx/parser/name-processor.hxx>
#include <xsd-frontend/semantic-graph.hxx>
@@ -35,10 +34,13 @@ namespace CXX
public:
Context (CLI::Options const& ops,
SemanticGraph::Schema& root,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const* map)
: CXX::Context (std::wcerr,
root,
+ map,
ops.value<CLI::char_type> (),
+ ops.value<CLI::char_encoding> (),
ops.value<CLI::include_with_brackets> (),
ops.value<CLI::include_prefix> (),
ops.value<CLI::export_symbol> (),
@@ -1101,9 +1103,10 @@ namespace CXX
Void
process_impl (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
- Context ctx (ops, tu, file);
+ Context ctx (ops, tu, file, &map);
if (tu.names_begin ()->named ().name () ==
L"http://www.w3.org/2001/XMLSchema")
@@ -1196,9 +1199,10 @@ namespace CXX
Void NameProcessor::
process (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
- process_impl (ops, tu, file);
+ process_impl (ops, tu, file, map);
}
}
}
diff --git a/xsd/cxx/parser/name-processor.hxx b/xsd/cxx/parser/name-processor.hxx
index f7849c8..fee7027 100644
--- a/xsd/cxx/parser/name-processor.hxx
+++ b/xsd/cxx/parser/name-processor.hxx
@@ -6,10 +6,9 @@
#ifndef CXX_PARSER_NAME_PROCESSOR_HXX
#define CXX_PARSER_NAME_PROCESSOR_HXX
-#include <cult/types.hxx>
-
#include <xsd-frontend/semantic-graph.hxx>
+#include <cxx/elements.hxx>
#include <cxx/parser/cli.hxx>
namespace CXX
@@ -26,7 +25,8 @@ namespace CXX
Void
process (CLI::Options const& ops,
XSDFrontend::SemanticGraph::Schema&,
- XSDFrontend::SemanticGraph::Path const& file);
+ XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const& map);
};
}
}
diff --git a/xsd/cxx/parser/parser-header.cxx b/xsd/cxx/parser/parser-header.cxx
index 878a891..8ecd898 100644
--- a/xsd/cxx/parser/parser-header.cxx
+++ b/xsd/cxx/parser/parser-header.cxx
@@ -1324,6 +1324,13 @@ namespace CXX
}
else
{
+ if (ctx.char_type == L"char" &&
+ ctx.xml_parser == L"xerces" &&
+ ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding << ".hxx>" << endl;
+ }
+
ctx.os << "#include <xsd/cxx/xml/error-handler.hxx>" << endl
<< "#include <xsd/cxx/parser/exceptions.hxx>" << endl
<< "#include <xsd/cxx/parser/elements.hxx>" << endl
diff --git a/xsd/cxx/parser/validator.cxx b/xsd/cxx/parser/validator.cxx
index 526c941..9b5d967 100644
--- a/xsd/cxx/parser/validator.cxx
+++ b/xsd/cxx/parser/validator.cxx
@@ -27,7 +27,7 @@ namespace CXX
CLI::Options const& options,
const WarningSet& disabled_warnings,
Boolean& valid_)
- : Context (std::wcerr, root, options, 0, 0, 0),
+ : Context (std::wcerr, root, options, 0, 0, 0, 0),
disabled_warnings_ (disabled_warnings),
disabled_warnings_all_ (false),
valid (valid_),
@@ -584,7 +584,20 @@ namespace CXX
if (options.value<CLI::xml_parser> () == "expat" &&
options.value<CLI::char_type> () == "wchar_t")
{
- wcerr << "error: using expat with wchar_t is not yet supported"
+ wcerr << "error: using expat with wchar_t is not supported"
+ << endl;
+
+ return false;
+ }
+
+ //
+ //
+ if (options.value<CLI::xml_parser> () == "expat" &&
+ !options.value<CLI::char_encoding> ().empty () &&
+ options.value<CLI::char_encoding> () != "utf8")
+ {
+ wcerr << "error: using expat with character encoding other than "
+ << "utf8 is not supported"
<< endl;
return false;
diff --git a/xsd/cxx/tree/cli.hxx b/xsd/cxx/tree/cli.hxx
index 9ccf405..c9078e7 100644
--- a/xsd/cxx/tree/cli.hxx
+++ b/xsd/cxx/tree/cli.hxx
@@ -24,6 +24,7 @@ namespace CXX
typedef Char const Key[];
extern Key char_type;
+ extern Key char_encoding;
extern Key output_dir;
extern Key generate_polymorphic;
extern Key generate_serialization;
@@ -119,6 +120,7 @@ namespace CXX
typedef Cult::CLI::Options<
char_type, NarrowString,
+ char_encoding, NarrowString,
output_dir, NarrowString,
generate_polymorphic, Boolean,
generate_serialization, Boolean,
diff --git a/xsd/cxx/tree/counter.cxx b/xsd/cxx/tree/counter.cxx
index d8223bb..a9649b5 100644
--- a/xsd/cxx/tree/counter.cxx
+++ b/xsd/cxx/tree/counter.cxx
@@ -239,7 +239,7 @@ namespace CXX
count (CLI::Options const& options, SemanticGraph::Schema& tu)
{
Counts counts;
- Context ctx (std::wcerr, tu, options, counts, false, 0, 0, 0);
+ Context ctx (std::wcerr, tu, options, counts, false, 0, 0, 0, 0);
Traversal::Schema schema;
Traversal::Sources sources;
diff --git a/xsd/cxx/tree/elements.cxx b/xsd/cxx/tree/elements.cxx
index db1d858..444caa4 100644
--- a/xsd/cxx/tree/elements.cxx
+++ b/xsd/cxx/tree/elements.cxx
@@ -39,12 +39,15 @@ namespace CXX
CLI::Options const& ops,
Counts const& counts_,
Boolean generate_xml_schema__,
+ StringLiteralMap const* map,
Regex const* fe,
Regex const* he,
Regex const* ie)
: CXX::Context (o,
root,
+ map,
ops.value<CLI::char_type> (),
+ ops.value<CLI::char_encoding> (),
ops.value<CLI::include_with_brackets> (),
ops.value<CLI::include_prefix> (),
ops.value<CLI::export_symbol> (),
diff --git a/xsd/cxx/tree/elements.hxx b/xsd/cxx/tree/elements.hxx
index 602291d..a0cb1d9 100644
--- a/xsd/cxx/tree/elements.hxx
+++ b/xsd/cxx/tree/elements.hxx
@@ -117,6 +117,7 @@ namespace CXX
CLI::Options const& ops,
Counts const& counts_,
Boolean generate_xml_schema,
+ StringLiteralMap const*,
Regex const* fwd_expr,
Regex const* hxx_expr,
Regex const* ixx_expr);
diff --git a/xsd/cxx/tree/generator.cxx b/xsd/cxx/tree/generator.cxx
index f9b055e..b81504c 100644
--- a/xsd/cxx/tree/generator.cxx
+++ b/xsd/cxx/tree/generator.cxx
@@ -116,6 +116,7 @@ namespace CXX
namespace CLI
{
extern Key char_type = "char-type";
+ extern Key char_encoding = "char-encoding";
extern Key output_dir = "output-dir";
extern Key generate_polymorphic = "generate-polymorphic";
extern Key generate_serialization = "generate-serialization";
@@ -220,12 +221,19 @@ namespace CXX
<< " values are 'char' (default) and 'wchar_t'."
<< endl;
+ e << "--char-encoding <enc>" << endl
+ << " Specify the character encoding that should be used\n"
+ << " in the object model. Valid values for the 'char'\n"
+ << " character type are 'utf8' (default), 'iso8859-1',\n"
+ << " 'lcp', and 'custom'. For the 'wchar_t' character\n"
+ << " type the only valid value is 'auto'."
+ << endl;
+
e << "--output-dir <dir>" << endl
<< " Write generated files to <dir> instead of current\n"
<< " directory."
<< endl;
-
e << "--generate-polymorphic" << endl
<< " Generate polymorphism-aware code. Specify this\n"
<< " option if you use substitution groups or xsi:type."
@@ -670,6 +678,11 @@ namespace CXX
<< " separate the file name from the part number."
<< endl;
+ e << "--custom-literals <file>" << endl
+ << " Load custom XML string to C++ literal mappings\n"
+ << " from <file>."
+ << endl;
+
e << "--export-symbol <symbol>" << endl
<< " Export symbol for Win32 DLL export/import control."
<< endl;
@@ -803,6 +816,7 @@ namespace CXX
generate (Tree::CLI::Options const& ops,
Schema& schema,
Path const& file_path,
+ StringLiteralMap const& string_literal_map,
const WarningSet& disabled_warnings,
FileList& file_list,
AutoUnlinks& unlinks)
@@ -860,7 +874,7 @@ namespace CXX
//
{
NameProcessor proc;
- if (!proc.process (ops, schema, file_path))
+ if (!proc.process (ops, schema, file_path, string_literal_map))
throw Failed ();
}
@@ -1179,8 +1193,15 @@ namespace CXX
//
if (forward)
{
- Context ctx (fwd, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (fwd,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> fwd_sloc (fwd);
@@ -1287,8 +1308,15 @@ namespace CXX
// HXX
//
{
- Context ctx (hxx, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (hxx,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> hxx_sloc (hxx);
@@ -1434,8 +1462,15 @@ namespace CXX
//
if (inline_)
{
- Context ctx (ixx, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (ixx,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> ixx_sloc (ixx);
@@ -1560,8 +1595,15 @@ namespace CXX
WideOutputFileStream& os (*cxx[part]);
- Context ctx (os, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (os,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> cxx_sloc (os);
@@ -1644,6 +1686,17 @@ namespace CXX
return sloc;
}
+ catch (UnrepresentableCharacter const& e)
+ {
+ wcerr << "error: character at position " << e.position () << " "
+ << "in string '" << e.string () << "' is unrepresentable in "
+ << "the target encoding" << endl;
+
+ wcerr << "info: use the --custom-literals option to provide custom "
+ << "string literals mapping" << endl;
+
+ throw Failed ();
+ }
catch (NoNamespaceMapping const& e)
{
wcerr << e.file () << ":" << e.line () << ":" << e.column ()
diff --git a/xsd/cxx/tree/generator.hxx b/xsd/cxx/tree/generator.hxx
index 1aa3c60..a66ede0 100644
--- a/xsd/cxx/tree/generator.hxx
+++ b/xsd/cxx/tree/generator.hxx
@@ -13,6 +13,7 @@
#include <xsd.hxx>
+#include <cxx/literal-map.hxx>
#include <cxx/tree/cli.hxx>
namespace CXX
@@ -36,6 +37,7 @@ namespace CXX
generate (CLI::Options const& options,
XSDFrontend::SemanticGraph::Schema&,
XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const&,
const WarningSet& disabled_warnings,
FileList& file_list,
AutoUnlinks& unlinks);
diff --git a/xsd/cxx/tree/name-processor.cxx b/xsd/cxx/tree/name-processor.cxx
index 53027af..e15b072 100644
--- a/xsd/cxx/tree/name-processor.cxx
+++ b/xsd/cxx/tree/name-processor.cxx
@@ -4,7 +4,6 @@
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
#include <cxx/tree/name-processor.hxx>
-#include <cxx/tree/elements.hxx>
#include <backend-elements/regex.hxx>
@@ -43,12 +42,14 @@ namespace CXX
Counts const& counts,
Boolean generate_xml_schema,
SemanticGraph::Schema& root,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
: Tree::Context (std::wcerr,
root,
options,
counts,
generate_xml_schema,
+ &map,
0,
0,
0),
@@ -1970,12 +1971,13 @@ namespace CXX
Boolean
process_impl (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
try
{
Counts counts;
- Context ctx (ops, counts, false, tu, file);
+ Context ctx (ops, counts, false, tu, file, map);
if (tu.names_begin ()->named ().name () ==
L"http://www.w3.org/2001/XMLSchema")
@@ -2096,9 +2098,10 @@ namespace CXX
Boolean NameProcessor::
process (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
- return process_impl (ops, tu, file);
+ return process_impl (ops, tu, file, map);
}
}
}
diff --git a/xsd/cxx/tree/name-processor.hxx b/xsd/cxx/tree/name-processor.hxx
index 9b8eac9..18c3b82 100644
--- a/xsd/cxx/tree/name-processor.hxx
+++ b/xsd/cxx/tree/name-processor.hxx
@@ -6,11 +6,7 @@
#ifndef CXX_TREE_NAME_PROCESSOR_HXX
#define CXX_TREE_NAME_PROCESSOR_HXX
-#include <cult/types.hxx>
-
-#include <xsd-frontend/semantic-graph.hxx>
-
-#include <cxx/tree/cli.hxx>
+#include <cxx/tree/elements.hxx>
namespace CXX
{
@@ -26,7 +22,8 @@ namespace CXX
Boolean
process (CLI::Options const&,
XSDFrontend::SemanticGraph::Schema&,
- XSDFrontend::SemanticGraph::Path const& file);
+ XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const&);
};
}
}
diff --git a/xsd/cxx/tree/tree-forward.cxx b/xsd/cxx/tree/tree-forward.cxx
index cceedb7..02c4317 100644
--- a/xsd/cxx/tree/tree-forward.cxx
+++ b/xsd/cxx/tree/tree-forward.cxx
@@ -152,6 +152,13 @@ namespace CXX
}
else
{
+ if (ctx.char_type == L"char" && ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding <<
+ ".hxx>" << endl
+ << endl;
+ }
+
ctx.os << "#include <xsd/cxx/tree/exceptions.hxx>" << endl
<< "#include <xsd/cxx/tree/elements.hxx>" << endl
<< "#include <xsd/cxx/tree/types.hxx>" << endl
diff --git a/xsd/cxx/tree/tree-header.cxx b/xsd/cxx/tree/tree-header.cxx
index 7bb630c..9b39739 100644
--- a/xsd/cxx/tree/tree-header.cxx
+++ b/xsd/cxx/tree/tree-header.cxx
@@ -3539,6 +3539,13 @@ namespace CXX
{
if (ctx.generate_xml_schema)
{
+ if (ctx.char_type == L"char" && ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding <<
+ ".hxx>" << endl
+ << endl;
+ }
+
ctx.os << "#include <xsd/cxx/tree/exceptions.hxx>" << endl
<< "#include <xsd/cxx/tree/elements.hxx>" << endl
<< "#include <xsd/cxx/tree/types.hxx>" << endl
@@ -3671,6 +3678,13 @@ namespace CXX
<< "#include <algorithm> // std::binary_search" << endl
<< endl;
+ if (ctx.char_type == L"char" && ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding <<
+ ".hxx>" << endl
+ << endl;
+ }
+
ctx.os << "#include <xsd/cxx/tree/exceptions.hxx>" << endl
<< "#include <xsd/cxx/tree/elements.hxx>" << endl
<< "#include <xsd/cxx/tree/containers.hxx>" << endl
diff --git a/xsd/cxx/tree/validator.cxx b/xsd/cxx/tree/validator.cxx
index 7ef23fa..5742e7a 100644
--- a/xsd/cxx/tree/validator.cxx
+++ b/xsd/cxx/tree/validator.cxx
@@ -38,6 +38,7 @@ namespace CXX
generate_xml_schema,
0,
0,
+ 0,
0),
disabled_warnings_ (disabled_warnings),
disabled_warnings_all_ (false),
diff --git a/xsd/elements.hxx b/xsd/elements.hxx
index bfde527..3948479 100644
--- a/xsd/elements.hxx
+++ b/xsd/elements.hxx
@@ -132,4 +132,3 @@ private:
};
#endif // ELEMENTS_HXX
-
diff --git a/xsd/makefile b/xsd/makefile
index e58b9dd..12990a6 100644
--- a/xsd/makefile
+++ b/xsd/makefile
@@ -7,7 +7,8 @@ include $(dir $(lastword $(MAKEFILE_LIST)))../build/bootstrap.make
cxx_tun := xsd.cxx
-cxx_tun += cxx/elements.cxx
+cxx_tun += cxx/elements.cxx \
+ cxx/literal-map.cxx
cxx_tun += cxx/parser/elements.cxx \
cxx/parser/validator.cxx \
@@ -88,17 +89,22 @@ $(call import,\
$(scf_root)/import/libxsd-frontend/stub.make,\
l: xsd_fe.l,cpp-options: xsd_fe.l.cpp-options)
+$(call import,\
+ $(scf_root)/import/libxerces-c/stub.make,\
+ l: xerces_c.l,cpp-options: xerces_c.l.cpp-options)
+
# Build.
#
$(xsd): $(cxx_obj) $(xsd_fe.l) $(be.l) $(cult.l) $(fs.l) $(re.l) $(xerces_c.l)
$(cxx_obj) $(cxx_od): cpp_options := -I$(src_base)
-$(cxx_obj) $(cxx_od): \
- $(xsd_fe.l.cpp-options) \
- $(be.l.cpp-options) \
- $(cult.l.cpp-options) \
- $(fs.l.cpp-options) \
- $(re.l.cpp-options)
+$(cxx_obj) $(cxx_od): \
+ $(xsd_fe.l.cpp-options) \
+ $(be.l.cpp-options) \
+ $(cult.l.cpp-options) \
+ $(fs.l.cpp-options) \
+ $(re.l.cpp-options) \
+ $(xerces_c.l.cpp-options)
$(call include-dep,$(cxx_od))
diff --git a/xsd/xsd.cxx b/xsd/xsd.cxx
index 7aa18e6..2a67ae9 100644
--- a/xsd/xsd.cxx
+++ b/xsd/xsd.cxx
@@ -34,6 +34,8 @@
#include <iostream>
#include <boost/filesystem/fstream.hpp>
+#include <xercesc/util/PlatformUtils.hpp>
+
#include <xsd.hxx>
#include <usage.hxx>
@@ -79,6 +81,7 @@ namespace CLI
extern Key location_map = "location-map";
extern Key location_regex = "location-regex";
extern Key location_regex_trace = "location-regex-trace";
+ extern Key custom_literals = "custom-literals";
extern Key file_per_type = "file-per-type";
extern Key type_file_regex = "type-file-regex";
extern Key type_file_regex_trace = "type-file-regex-trace";
@@ -101,6 +104,7 @@ namespace CLI
location_map, NarrowStrings,
location_regex, NarrowStrings,
location_regex_trace, Boolean,
+ custom_literals, NarrowString,
file_per_type, Boolean,
type_file_regex, NarrowStrings,
type_file_regex_trace, Boolean,
@@ -188,11 +192,27 @@ private:
Boolean trace_;
};
+//
+//
+struct XercesInitializer
+{
+ XercesInitializer ()
+ {
+ xercesc::XMLPlatformUtils::Initialize ();
+ }
+
+ ~XercesInitializer ()
+ {
+ xercesc::XMLPlatformUtils::Terminate ();
+ }
+};
+
// Expand the \n escape sequence.
//
Void
expand_nl (NarrowString& s);
+
Int
main (Int argc, Char* argv[])
{
@@ -557,6 +577,22 @@ main (Int argc, Char* argv[])
common_ops.value<CLI::anonymous_regex> (),
common_ops.value<CLI::anonymous_regex_trace> ());
+ // Load custom string literals, if any.
+ //
+ CXX::StringLiteralMap string_literal_map;
+
+ if (NarrowString file = common_ops.value<CLI::custom_literals> ())
+ {
+ XercesInitializer xerces_init;
+
+ if (!CXX::read_literal_map (file, string_literal_map))
+ {
+ // Diagnostics has already been issued.
+ //
+ return 1;
+ }
+ }
+
if (!fpt)
{
// File-per-schema compilation mode.
@@ -703,7 +739,13 @@ main (Int argc, Char* argv[])
try
{
sloc += CXX::Tree::Generator::generate (
- *tree_ops, *schema, tu, disabled_w, file_list, unlinks);
+ *tree_ops,
+ *schema,
+ tu,
+ string_literal_map,
+ disabled_w,
+ file_list,
+ unlinks);
}
catch (CXX::Tree::Generator::Failed const&)
{
@@ -717,7 +759,14 @@ main (Int argc, Char* argv[])
try
{
sloc += CXX::Parser::Generator::generate (
- *parser_ops, *schema, tu, true, disabled_w, file_list, unlinks);
+ *parser_ops,
+ *schema,
+ tu,
+ string_literal_map,
+ true,
+ disabled_w,
+ file_list,
+ unlinks);
}
catch (CXX::Parser::Generator::Failed const&)
{
@@ -837,7 +886,13 @@ main (Int argc, Char* argv[])
try
{
sloc += CXX::Tree::Generator::generate (
- *tree_ops, s, path, disabled_w, file_list, unlinks);
+ *tree_ops,
+ s,
+ path,
+ string_literal_map,
+ disabled_w,
+ file_list,
+ unlinks);
}
catch (CXX::Tree::Generator::Failed const&)
{
@@ -853,7 +908,14 @@ main (Int argc, Char* argv[])
// Only generate driver for the first schema.
//
sloc += CXX::Parser::Generator::generate (
- *parser_ops, s, path, i == b, disabled_w, file_list, unlinks);
+ *parser_ops,
+ s,
+ path,
+ string_literal_map,
+ i == b,
+ disabled_w,
+ file_list,
+ unlinks);
}
catch (CXX::Parser::Generator::Failed const&)
{