summaryrefslogtreecommitdiff
path: root/xsd/cxx
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2009-12-08 16:18:01 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2009-12-08 16:18:01 +0200
commit1ca6396a3dd284241de11bcaa210ad5836e8e5a8 (patch)
tree465c19f0d668a91bb556d748911847acfb80cb09 /xsd/cxx
parentd71611d5fb575078bdf573c35257bb86bb7054e0 (diff)
Multiple object model character encodings support
Also add support for ISO-8859-1.
Diffstat (limited to 'xsd/cxx')
-rw-r--r--xsd/cxx/elements.cxx275
-rw-r--r--xsd/cxx/elements.hxx39
-rw-r--r--xsd/cxx/literal-map.cxx296
-rw-r--r--xsd/cxx/literal-map.hxx23
-rw-r--r--xsd/cxx/parser/cli.hxx2
-rw-r--r--xsd/cxx/parser/elements.cxx3
-rw-r--r--xsd/cxx/parser/elements.hxx1
-rw-r--r--xsd/cxx/parser/generator.cxx82
-rw-r--r--xsd/cxx/parser/generator.hxx2
-rw-r--r--xsd/cxx/parser/name-processor.cxx16
-rw-r--r--xsd/cxx/parser/name-processor.hxx6
-rw-r--r--xsd/cxx/parser/parser-header.cxx7
-rw-r--r--xsd/cxx/parser/validator.cxx17
-rw-r--r--xsd/cxx/tree/cli.hxx2
-rw-r--r--xsd/cxx/tree/counter.cxx2
-rw-r--r--xsd/cxx/tree/elements.cxx3
-rw-r--r--xsd/cxx/tree/elements.hxx1
-rw-r--r--xsd/cxx/tree/generator.cxx73
-rw-r--r--xsd/cxx/tree/generator.hxx2
-rw-r--r--xsd/cxx/tree/name-processor.cxx15
-rw-r--r--xsd/cxx/tree/name-processor.hxx9
-rw-r--r--xsd/cxx/tree/tree-forward.cxx7
-rw-r--r--xsd/cxx/tree/tree-header.cxx14
-rw-r--r--xsd/cxx/tree/validator.cxx1
24 files changed, 847 insertions, 51 deletions
diff --git a/xsd/cxx/elements.cxx b/xsd/cxx/elements.cxx
index fd23fc0..764d7da 100644
--- a/xsd/cxx/elements.cxx
+++ b/xsd/cxx/elements.cxx
@@ -8,7 +8,9 @@
#include <backend-elements/regex.hxx>
#include <cctype> // std::toupper
+#include <memory>
#include <sstream>
+#include <fstream>
#include <iostream>
using std::wcerr;
@@ -111,7 +113,9 @@ namespace CXX
Context::
Context (std::wostream& o,
SemanticGraph::Schema& root,
+ StringLiteralMap const* string_literal_map_,
NarrowString const& char_type__,
+ NarrowString const& char_encoding__,
Boolean include_with_brackets__,
NarrowString const& include_prefix__,
NarrowString const& esymbol,
@@ -125,8 +129,10 @@ namespace CXX
: os (o),
schema_root (root),
char_type (char_type_),
+ char_encoding (char_encoding_),
L (L_),
string_type (string_type_),
+ string_literal_map (string_literal_map_),
include_with_brackets (include_with_brackets_),
include_prefix (include_prefix_),
type_exp (type_exp_),
@@ -135,6 +141,7 @@ namespace CXX
ns_mapping_cache (ns_mapping_cache_),
xs_ns_ (0),
char_type_ (char_type__),
+ char_encoding_ (char_encoding__),
L_ (char_type == L"wchar_t" ? L"L" : L""),
include_with_brackets_ (include_with_brackets__),
include_prefix_ (include_prefix__),
@@ -177,7 +184,7 @@ namespace CXX
xs_ns_ = dynamic_cast<SemanticGraph::Namespace*> (n);
}
- //
+ // String type.
//
if (char_type == L"char")
string_type_ = L"::std::string";
@@ -186,6 +193,16 @@ namespace CXX
else
string_type_ = L"::std::basic_string< " + char_type + L" >";
+ // Default encoding.
+ //
+ if (!char_encoding)
+ {
+ if (char_type == L"char")
+ char_encoding = L"utf8";
+ else
+ char_encoding = L"auto";
+ }
+
// Default mapping.
//
nsr_mapping_.push_back (
@@ -615,6 +632,121 @@ namespace CXX
return r;
}
+ String
+ strlit_ascii (String const& str)
+ {
+ String r;
+ Size n (str.size ());
+
+ // In most common cases we will have that many chars.
+ //
+ r.reserve (n + 2);
+
+ r += '"';
+
+ Boolean escape (false);
+
+ for (Size i (0); i < n; ++i)
+ {
+ UnsignedLong u (Context::unicode_char (str, i)); // May advance i.
+
+ // [128 - ] - unrepresentable
+ // 127 - \x7F
+ // [32 - 126] - as is
+ // [0 - 31] - \X or \xXX
+ //
+
+ if (u < 32 || u == 127)
+ {
+ switch (u)
+ {
+ case L'\n':
+ {
+ r += L"\\n";
+ break;
+ }
+ case L'\t':
+ {
+ r += L"\\t";
+ break;
+ }
+ case L'\v':
+ {
+ r += L"\\v";
+ break;
+ }
+ case L'\b':
+ {
+ r += L"\\b";
+ break;
+ }
+ case L'\r':
+ {
+ r += L"\\r";
+ break;
+ }
+ case L'\f':
+ {
+ r += L"\\f";
+ break;
+ }
+ case L'\a':
+ {
+ r += L"\\a";
+ break;
+ }
+ default:
+ {
+ r += charlit (u);
+ escape = true;
+ break;
+ }
+ }
+ }
+ else if (u < 127)
+ {
+ if (escape)
+ {
+ // Close and open the string so there are no clashes.
+ //
+ r += '"';
+ r += '"';
+
+ escape = false;
+ }
+
+ switch (u)
+ {
+ case L'"':
+ {
+ r += L"\\\"";
+ break;
+ }
+ case L'\\':
+ {
+ r += L"\\\\";
+ break;
+ }
+ default:
+ {
+ r += static_cast<WideChar> (u);
+ break;
+ }
+ }
+ }
+ else
+ {
+ // Unrepresentable character.
+ //
+ throw UnrepresentableCharacter (str, i + 1);
+ }
+ }
+
+ r += '"';
+
+ return r;
+ }
+
const UnsignedLong utf8_first_char_mask[5] =
{
0x00, 0x00, 0xC0, 0xE0, 0xF0
@@ -770,6 +902,126 @@ namespace CXX
}
String
+ strlit_iso8859_1 (String const& str)
+ {
+ String r;
+ Size n (str.size ());
+
+ // In most common cases we will have that many chars.
+ //
+ r.reserve (n + 2);
+
+ r += '"';
+
+ Boolean escape (false);
+
+ for (Size i (0); i < n; ++i)
+ {
+ UnsignedLong u (Context::unicode_char (str, i)); // May advance i.
+
+ // [256 - ] - unrepresentable
+ // [127 - 255] - \xXX
+ // [32 - 126] - as is
+ // [0 - 31] - \X or \xXX
+ //
+
+ if (u < 32)
+ {
+ switch (u)
+ {
+ case L'\n':
+ {
+ r += L"\\n";
+ break;
+ }
+ case L'\t':
+ {
+ r += L"\\t";
+ break;
+ }
+ case L'\v':
+ {
+ r += L"\\v";
+ break;
+ }
+ case L'\b':
+ {
+ r += L"\\b";
+ break;
+ }
+ case L'\r':
+ {
+ r += L"\\r";
+ break;
+ }
+ case L'\f':
+ {
+ r += L"\\f";
+ break;
+ }
+ case L'\a':
+ {
+ r += L"\\a";
+ break;
+ }
+ default:
+ {
+ r += charlit (u);
+ escape = true;
+ break;
+ }
+ }
+ }
+ else if (u < 127)
+ {
+ if (escape)
+ {
+ // Close and open the string so there are no clashes.
+ //
+ r += '"';
+ r += '"';
+
+ escape = false;
+ }
+
+ switch (u)
+ {
+ case L'"':
+ {
+ r += L"\\\"";
+ break;
+ }
+ case L'\\':
+ {
+ r += L"\\\\";
+ break;
+ }
+ default:
+ {
+ r += static_cast<WideChar> (u);
+ break;
+ }
+ }
+ }
+ else if (u < 256)
+ {
+ r += charlit (u);
+ escape = true;
+ }
+ else
+ {
+ // Unrepresentable character.
+ //
+ throw UnrepresentableCharacter (str, i + 1);
+ }
+ }
+
+ r += '"';
+
+ return r;
+ }
+
+ String
strlit_utf32 (String const& str)
{
String r;
@@ -886,8 +1138,27 @@ namespace CXX
String Context::
strlit (String const& str)
{
+ // First see if we have a custom mapping.
+ //
+ assert (string_literal_map != 0);
+ StringLiteralMap::ConstIterator i (string_literal_map->find (str));
+
+ if (i != string_literal_map->end ())
+ return i->second;
+
if (char_type == L"char")
- return strlit_utf8 (str);
+ {
+ if (char_encoding == L"utf8")
+ return strlit_utf8 (str);
+ else if (char_encoding == L"iso8859-1")
+ return strlit_iso8859_1 (str);
+ else
+ {
+ // For LCP, custom, and other unknown encodings, use ASCII.
+ //
+ return strlit_ascii (str);
+ }
+ }
else
return strlit_utf32 (str);
}
diff --git a/xsd/cxx/elements.hxx b/xsd/cxx/elements.hxx
index 39eee77..3bbacd0 100644
--- a/xsd/cxx/elements.hxx
+++ b/xsd/cxx/elements.hxx
@@ -6,6 +6,8 @@
#ifndef CXX_ELEMENTS_HXX
#define CXX_ELEMENTS_HXX
+#include <ostream>
+
#include <cult/types.hxx>
#include <cult/containers/set.hxx>
#include <cult/containers/map.hxx>
@@ -17,8 +19,7 @@
#include <xsd-frontend/traversal.hxx>
#include <elements.hxx>
-
-#include <ostream>
+#include <cxx/literal-map.hxx>
namespace CXX
{
@@ -36,6 +37,30 @@ namespace CXX
// Exceptions.
//
+ struct UnrepresentableCharacter
+ {
+ UnrepresentableCharacter (String const& str, Size pos)
+ : str_ (str), pos_ (pos)
+ {
+ }
+
+ String const&
+ string () const
+ {
+ return str_;
+ }
+
+ Size
+ position () const
+ {
+ return pos_;
+ }
+
+ private:
+ String str_;
+ Size pos_;
+ };
+
struct NoNamespaceMapping
{
NoNamespaceMapping (SemanticGraph::Path const& file,
@@ -106,7 +131,6 @@ namespace CXX
String reason_;
};
-
//
//
class Context
@@ -124,7 +148,9 @@ namespace CXX
public:
Context (std::wostream& o,
SemanticGraph::Schema& root,
+ StringLiteralMap const* custom_literals_map,
NarrowString const& char_type__,
+ NarrowString const& char_encoding__,
Boolean include_with_brackets__,
NarrowString const& include_prefix__,
NarrowString const& esymbol,
@@ -141,8 +167,10 @@ namespace CXX
: os (c.os),
schema_root (c.schema_root),
char_type (c.char_type),
+ char_encoding (c.char_encoding),
L (c.L),
string_type (c.string_type),
+ string_literal_map (c.string_literal_map),
include_with_brackets (c.include_with_brackets),
include_prefix (c.include_prefix),
type_exp (c.type_exp),
@@ -166,8 +194,10 @@ namespace CXX
: os (o),
schema_root (c.schema_root),
char_type (c.char_type),
+ char_encoding (c.char_encoding),
L (c.L),
string_type (c.string_type),
+ string_literal_map (c.string_literal_map),
include_with_brackets (c.include_with_brackets),
include_prefix (c.include_prefix),
type_exp (c.type_exp),
@@ -309,8 +339,10 @@ namespace CXX
SemanticGraph::Schema& schema_root;
String& char_type;
+ String& char_encoding;
String& L; // string literal prefix
String& string_type;
+ StringLiteralMap const* string_literal_map;
Boolean& include_with_brackets;
String& include_prefix;
@@ -326,6 +358,7 @@ namespace CXX
SemanticGraph::Namespace* xs_ns_;
String char_type_;
+ String char_encoding_;
String L_;
String string_type_;
diff --git a/xsd/cxx/literal-map.cxx b/xsd/cxx/literal-map.cxx
new file mode 100644
index 0000000..f3f7ee0
--- /dev/null
+++ b/xsd/cxx/literal-map.cxx
@@ -0,0 +1,296 @@
+// file : xsd/cxx/literal-map.cxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#include <memory> // std::auto_ptr
+#include <cstddef> // std::size_t
+#include <fstream>
+#include <iostream>
+
+#include <xercesc/util/XMLUni.hpp>
+#include <xercesc/util/XercesVersion.hpp>
+
+#include <xercesc/framework/LocalFileInputSource.hpp>
+
+#include <xercesc/sax/Locator.hpp>
+#include <xercesc/sax/SAXParseException.hpp>
+#include <xercesc/sax2/DefaultHandler.hpp>
+#include <xercesc/sax2/SAX2XMLReader.hpp>
+#include <xercesc/sax2/XMLReaderFactory.hpp>
+
+#include <xsd-frontend/xml.hxx>
+
+#include <cxx/literal-map.hxx>
+
+using namespace std;
+using namespace xercesc;
+namespace XML = XSDFrontend::XML;
+
+namespace CXX
+{
+ class Handler: public DefaultHandler
+ {
+ public:
+ struct Failed {};
+
+ Handler (String const& file, StringLiteralMap& map)
+ : state_ (s_init), file_ (file), map_ (map)
+ {
+ }
+
+ virtual void
+ setDocumentLocator (const Locator* const l)
+ {
+ locator_ = l;
+ }
+
+ virtual Void
+ startElement (const XMLCh* const,
+ const XMLCh* const lname,
+ const XMLCh* const,
+ const xercesc::Attributes&)
+ {
+ String n (XML::transcode (lname));
+
+ if (n == L"string-literal-map" && state_ == s_init)
+ state_ = s_map;
+ else if (n == L"entry" && state_ == s_map)
+ {
+ str_seen_ = false;
+ lit_seen_ = false;
+ state_ = s_entry;
+ }
+ else if (n == L"string" && state_ == s_entry)
+ {
+ str_seen_ = true;
+ str_.clear ();
+ state_ = s_string;
+ }
+ else if (n == L"literal" && state_ == s_entry)
+ {
+ lit_seen_ = true;
+ lit_.clear ();
+ state_ = s_literal;
+ }
+ else
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "unexpected element '" << n << "'" << endl;
+ throw Failed ();
+ }
+ }
+
+ virtual Void
+ endElement (const XMLCh* const,
+ const XMLCh* const lname,
+ const XMLCh* const)
+ {
+ String n (XML::transcode (lname));
+
+ if (n == L"string-literal-map")
+ state_ = s_init;
+ else if (n == L"entry")
+ {
+ if (!str_seen_)
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "expected 'string' element" << endl;
+ throw Failed ();
+ }
+
+ if (!lit_seen_)
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "expected 'literal' element" << endl;
+ throw Failed ();
+ }
+
+ map_[str_] = lit_;
+ state_ = s_map;
+ }
+ else if (n == L"string")
+ state_ = s_entry;
+ else if (n == L"literal")
+ state_ = s_entry;
+ }
+
+#if _XERCES_VERSION >= 30000
+ virtual Void
+ characters (const XMLCh* const s, const XMLSize_t length)
+#else
+ virtual Void
+ characters (const XMLCh* const s, const unsigned int length)
+#endif
+ {
+ String str (XML::transcode (s, length));
+
+ if (state_ == s_string)
+ str_ += str;
+ else if (state_ == s_literal)
+ lit_ += str;
+ else
+ {
+ for (Size i (0); i < str.size (); ++i)
+ {
+ WideChar c (str[i]);
+
+ if (c != 0x20 && c != 0x0A && c != 0x0D && c != 0x09)
+ {
+ wcerr << file_ << ":" << line () << ":" << col () << ": error: "
+ << "unexpected character data" << endl;
+ throw Failed ();
+ }
+ }
+ }
+ }
+
+ // Error hanlding.
+ //
+ enum Severity {s_warning, s_error, s_fatal};
+
+ virtual Void
+ warning (const SAXParseException& e)
+ {
+ handle (e, s_warning);
+ }
+
+ virtual Void
+ error (const SAXParseException& e)
+ {
+ handle (e, s_error);
+ }
+
+ virtual Void
+ fatalError (const SAXParseException& e)
+ {
+ handle (e, s_fatal);
+ }
+
+ virtual Void
+ resetErrors ()
+ {
+ }
+
+ Void
+ handle (const SAXParseException& e, Severity s)
+ {
+ wcerr << file_ << ":";
+
+#if _XERCES_VERSION >= 30000
+ wcerr << e.getLineNumber () << ":" << e.getColumnNumber () << ": ";
+#else
+ XMLSSize_t l (e.getLineNumber ());
+ XMLSSize_t c (e.getColumnNumber ());
+ wcerr << (l == -1 ? 0 : l) << ":" << (c == -1 ? 0 : c) << ": ";
+#endif
+
+ String msg (XML::transcode (e.getMessage ()));
+ wcerr << (s == s_warning ? "warning: " : "error: ") << msg << endl;
+
+ if (s != s_warning)
+ throw Failed ();
+ }
+
+ size_t
+ line () const
+ {
+ size_t r (0);
+
+ if (locator_ != 0)
+ {
+#if _XERCES_VERSION >= 30000
+ r = static_cast<size_t> (locator_->getLineNumber ());
+#else
+ XMLSSize_t l (locator_->getLineNumber ());
+ r = l == -1 ? 0 : static_cast<size_t> (l);
+#endif
+ }
+
+ return r;
+ }
+
+ size_t
+ col () const
+ {
+ size_t r (0);
+
+ if (locator_ != 0)
+ {
+#if _XERCES_VERSION >= 30000
+ r = static_cast<size_t> (locator_->getColumnNumber ());
+#else
+ XMLSSize_t c (locator_->getColumnNumber ());
+ r = c == -1 ? 0 : static_cast<size_t> (c);
+#endif
+ }
+
+ return r;
+ }
+
+ private:
+ const Locator* locator_;
+
+ enum
+ {
+ s_init,
+ s_map,
+ s_entry,
+ s_string,
+ s_literal
+ } state_;
+
+ String file_;
+ StringLiteralMap& map_;
+
+ Boolean str_seen_;
+ Boolean lit_seen_;
+
+ String str_;
+ String lit_;
+ };
+
+ bool
+ read_literal_map (NarrowString const& file, StringLiteralMap& map)
+ {
+ try
+ {
+ // Try to open the file with fstream. This way we get to
+ // report the error in a consistent manner.
+ //
+ {
+ ifstream ifs (file.c_str ());
+ if (!ifs.is_open ())
+ {
+ wcerr << file.c_str () << ": unable to open in read mode" << endl;
+ return false;
+ }
+ }
+
+ String wfile (file);
+
+ LocalFileInputSource is (XML::XMLChString (wfile).c_str ());
+ Handler h (wfile, map);
+
+ auto_ptr<SAX2XMLReader> parser (
+ XMLReaderFactory::createXMLReader ());
+
+ parser->setFeature (XMLUni::fgSAX2CoreNameSpaces, true);
+ parser->setFeature (XMLUni::fgSAX2CoreNameSpacePrefixes, true);
+ parser->setFeature (XMLUni::fgSAX2CoreValidation, false);
+ parser->setFeature (XMLUni::fgXercesSchema, false);
+ parser->setFeature (XMLUni::fgXercesSchemaFullChecking, false);
+
+ parser->setErrorHandler (&h);
+ parser->setContentHandler (&h);
+
+ parser->parse (is);
+ }
+ catch (Handler::Failed const&)
+ {
+ return false;
+ }
+
+ return true;
+ }
+}
diff --git a/xsd/cxx/literal-map.hxx b/xsd/cxx/literal-map.hxx
new file mode 100644
index 0000000..1120045
--- /dev/null
+++ b/xsd/cxx/literal-map.hxx
@@ -0,0 +1,23 @@
+// file : xsd/cxx/literal-map.hxx
+// author : Boris Kolpackov <boris@codesynthesis.com>
+// copyright : Copyright (c) 2005-2009 Code Synthesis Tools CC
+// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
+
+#ifndef CXX_LITERAL_MAP_HXX
+#define CXX_LITERAL_MAP_HXX
+
+#include <cult/types.hxx>
+#include <cult/containers/map.hxx>
+
+namespace CXX
+{
+ using namespace Cult;
+ typedef WideString String;
+
+ typedef Cult::Containers::Map<String, String> StringLiteralMap;
+
+ bool
+ read_literal_map (NarrowString const& file, StringLiteralMap& map);
+}
+
+#endif // CXX_LITERAL_MAP_HXX
diff --git a/xsd/cxx/parser/cli.hxx b/xsd/cxx/parser/cli.hxx
index 504de43..5f31af7 100644
--- a/xsd/cxx/parser/cli.hxx
+++ b/xsd/cxx/parser/cli.hxx
@@ -24,6 +24,7 @@ namespace CXX
typedef Char const Key[];
extern Key type_map;
+ extern Key char_encoding;
extern Key char_type;
extern Key output_dir;
extern Key xml_parser;
@@ -85,6 +86,7 @@ namespace CXX
typedef Cult::CLI::Options<
type_map, Cult::Containers::Vector<NarrowString>,
char_type, NarrowString,
+ char_encoding, NarrowString,
output_dir, NarrowString,
xml_parser, NarrowString,
generate_inline, Boolean,
diff --git a/xsd/cxx/parser/elements.cxx b/xsd/cxx/parser/elements.cxx
index 8a02ffb..09d1008 100644
--- a/xsd/cxx/parser/elements.cxx
+++ b/xsd/cxx/parser/elements.cxx
@@ -42,12 +42,15 @@ namespace CXX
Context (std::wostream& o,
SemanticGraph::Schema& root,
CLI::Options const& ops,
+ StringLiteralMap const* map,
Regex const* he,
Regex const* ie,
Regex const* hie)
: CXX::Context (o,
root,
+ map,
ops.value<CLI::char_type> (),
+ ops.value<CLI::char_encoding> (),
ops.value<CLI::include_with_brackets> (),
ops.value<CLI::include_prefix> (),
ops.value<CLI::export_symbol> (),
diff --git a/xsd/cxx/parser/elements.hxx b/xsd/cxx/parser/elements.hxx
index 90ff84e..61cde69 100644
--- a/xsd/cxx/parser/elements.hxx
+++ b/xsd/cxx/parser/elements.hxx
@@ -39,6 +39,7 @@ namespace CXX
Context (std::wostream&,
SemanticGraph::Schema&,
CLI::Options const&,
+ StringLiteralMap const*,
Regex const* hxx_expr,
Regex const* ixx_expr,
Regex const* hxx_impl_expr);
diff --git a/xsd/cxx/parser/generator.cxx b/xsd/cxx/parser/generator.cxx
index 342e3f2..ec08af4 100644
--- a/xsd/cxx/parser/generator.cxx
+++ b/xsd/cxx/parser/generator.cxx
@@ -126,9 +126,9 @@ namespace CXX
{
namespace CLI
{
- extern Key char_type;
extern Key type_map = "type-map";
extern Key char_type = "char-type";
+ extern Key char_encoding = "char-encoding";
extern Key output_dir = "output-dir";
extern Key xml_parser = "xml-parser";
extern Key generate_inline = "generate-inline";
@@ -206,6 +206,14 @@ namespace CXX
<< " values are 'char' (default) and 'wchar_t'."
<< endl;
+ e << "--char-encoding <enc>" << endl
+ << " Specify the character encoding that should be used\n"
+ << " in the object model. Valid values for the 'char'\n"
+ << " character type are 'utf8' (default), 'iso8859-1',\n"
+ << " 'lcp', and 'custom'. For the 'wchar_t' character\n"
+ << " type the only valid value is 'auto'."
+ << endl;
+
e << "--output-dir <dir>" << endl
<< " Write generated files to <dir> instead of current\n"
<< " directory."
@@ -471,6 +479,11 @@ namespace CXX
// Misc.
//
+ e << "--custom-literals <file>" << endl
+ << " Load custom XML string to C++ literal mappings\n"
+ << " from <file>."
+ << endl;
+
e << "--export-symbol <symbol>" << endl
<< " Export symbol for Win32 DLL export/import control."
<< endl;
@@ -600,6 +613,7 @@ namespace CXX
generate (Parser::CLI::Options const& ops,
Schema& schema,
Path const& file_path,
+ StringLiteralMap const& string_literal_map,
Boolean gen_driver,
const WarningSet& disabled_warnings,
FileList& file_list,
@@ -648,7 +662,7 @@ namespace CXX
//
{
NameProcessor proc;
- proc.process (ops, schema, file_path);
+ proc.process (ops, schema, file_path, string_literal_map);
}
Boolean validation ((ops.value<CLI::xml_parser> () == "expat" ||
@@ -701,7 +715,7 @@ namespace CXX
String xns;
{
- Context ctx (std::wcerr, schema, ops, 0, 0, 0);
+ Context ctx (std::wcerr, schema, ops, 0, 0, 0, 0);
xns = ctx.xs_ns_name ();
}
@@ -1144,7 +1158,13 @@ namespace CXX
// HXX
//
{
- Context ctx (hxx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (hxx,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
Indentation::Clip<Indentation::SLOC, WideChar> hxx_sloc (hxx);
@@ -1231,7 +1251,13 @@ namespace CXX
//
if (inline_)
{
- Context ctx (ixx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (ixx,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
Indentation::Clip<Indentation::SLOC, WideChar> ixx_sloc (ixx);
@@ -1287,7 +1313,13 @@ namespace CXX
//
if (source)
{
- Context ctx (cxx, schema, ops, &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (cxx,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
Indentation::Clip<Indentation::SLOC, WideChar> cxx_sloc (cxx);
@@ -1351,8 +1383,13 @@ namespace CXX
//
if (impl)
{
- Context ctx (hxx_impl, schema, ops,
- &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (hxx_impl,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
String guard (guard_expr.merge (guard_prefix + hxx_impl_name));
guard = ctx.escape (guard); // Make it a C++ id.
@@ -1380,8 +1417,13 @@ namespace CXX
//
if (impl)
{
- Context ctx (cxx_impl, schema, ops,
- &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (cxx_impl,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
// Set auto-indentation.
//
@@ -1397,8 +1439,13 @@ namespace CXX
//
if (driver)
{
- Context ctx (cxx_driver, schema, ops,
- &hxx_expr, &ixx_expr, &hxx_impl_expr);
+ Context ctx (cxx_driver,
+ schema,
+ ops,
+ &string_literal_map,
+ &hxx_expr,
+ &ixx_expr,
+ &hxx_impl_expr);
// Set auto-indentation.
//
@@ -1412,6 +1459,17 @@ namespace CXX
return sloc;
}
+ catch (UnrepresentableCharacter const& e)
+ {
+ wcerr << "error: character at position " << e.position () << " "
+ << "in string '" << e.string () << "' is unrepresentable in "
+ << "the target encoding" << endl;
+
+ wcerr << "info: use the --custom-literals option to provide custom "
+ << "string literals mapping" << endl;
+
+ throw Failed ();
+ }
catch (NoNamespaceMapping const& e)
{
wcerr << e.file () << ":" << e.line () << ":" << e.column ()
diff --git a/xsd/cxx/parser/generator.hxx b/xsd/cxx/parser/generator.hxx
index aaab3b8..8c5631d 100644
--- a/xsd/cxx/parser/generator.hxx
+++ b/xsd/cxx/parser/generator.hxx
@@ -18,6 +18,7 @@
#include <xsd.hxx>
+#include <cxx/literal-map.hxx>
#include <cxx/parser/cli.hxx>
namespace CXX
@@ -41,6 +42,7 @@ namespace CXX
generate (CLI::Options const& options,
XSDFrontend::SemanticGraph::Schema&,
XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const&,
Boolean gen_driver,
const WarningSet& disabled_warnings,
FileList& file_list,
diff --git a/xsd/cxx/parser/name-processor.cxx b/xsd/cxx/parser/name-processor.cxx
index e9ba876..5f9209e 100644
--- a/xsd/cxx/parser/name-processor.cxx
+++ b/xsd/cxx/parser/name-processor.cxx
@@ -3,7 +3,6 @@
// copyright : Copyright (c) 2006-2009 Code Synthesis Tools CC
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
-#include <cxx/elements.hxx>
#include <cxx/parser/name-processor.hxx>
#include <xsd-frontend/semantic-graph.hxx>
@@ -35,10 +34,13 @@ namespace CXX
public:
Context (CLI::Options const& ops,
SemanticGraph::Schema& root,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const* map)
: CXX::Context (std::wcerr,
root,
+ map,
ops.value<CLI::char_type> (),
+ ops.value<CLI::char_encoding> (),
ops.value<CLI::include_with_brackets> (),
ops.value<CLI::include_prefix> (),
ops.value<CLI::export_symbol> (),
@@ -1101,9 +1103,10 @@ namespace CXX
Void
process_impl (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
- Context ctx (ops, tu, file);
+ Context ctx (ops, tu, file, &map);
if (tu.names_begin ()->named ().name () ==
L"http://www.w3.org/2001/XMLSchema")
@@ -1196,9 +1199,10 @@ namespace CXX
Void NameProcessor::
process (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
- process_impl (ops, tu, file);
+ process_impl (ops, tu, file, map);
}
}
}
diff --git a/xsd/cxx/parser/name-processor.hxx b/xsd/cxx/parser/name-processor.hxx
index f7849c8..fee7027 100644
--- a/xsd/cxx/parser/name-processor.hxx
+++ b/xsd/cxx/parser/name-processor.hxx
@@ -6,10 +6,9 @@
#ifndef CXX_PARSER_NAME_PROCESSOR_HXX
#define CXX_PARSER_NAME_PROCESSOR_HXX
-#include <cult/types.hxx>
-
#include <xsd-frontend/semantic-graph.hxx>
+#include <cxx/elements.hxx>
#include <cxx/parser/cli.hxx>
namespace CXX
@@ -26,7 +25,8 @@ namespace CXX
Void
process (CLI::Options const& ops,
XSDFrontend::SemanticGraph::Schema&,
- XSDFrontend::SemanticGraph::Path const& file);
+ XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const& map);
};
}
}
diff --git a/xsd/cxx/parser/parser-header.cxx b/xsd/cxx/parser/parser-header.cxx
index 878a891..8ecd898 100644
--- a/xsd/cxx/parser/parser-header.cxx
+++ b/xsd/cxx/parser/parser-header.cxx
@@ -1324,6 +1324,13 @@ namespace CXX
}
else
{
+ if (ctx.char_type == L"char" &&
+ ctx.xml_parser == L"xerces" &&
+ ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding << ".hxx>" << endl;
+ }
+
ctx.os << "#include <xsd/cxx/xml/error-handler.hxx>" << endl
<< "#include <xsd/cxx/parser/exceptions.hxx>" << endl
<< "#include <xsd/cxx/parser/elements.hxx>" << endl
diff --git a/xsd/cxx/parser/validator.cxx b/xsd/cxx/parser/validator.cxx
index 526c941..9b5d967 100644
--- a/xsd/cxx/parser/validator.cxx
+++ b/xsd/cxx/parser/validator.cxx
@@ -27,7 +27,7 @@ namespace CXX
CLI::Options const& options,
const WarningSet& disabled_warnings,
Boolean& valid_)
- : Context (std::wcerr, root, options, 0, 0, 0),
+ : Context (std::wcerr, root, options, 0, 0, 0, 0),
disabled_warnings_ (disabled_warnings),
disabled_warnings_all_ (false),
valid (valid_),
@@ -584,7 +584,20 @@ namespace CXX
if (options.value<CLI::xml_parser> () == "expat" &&
options.value<CLI::char_type> () == "wchar_t")
{
- wcerr << "error: using expat with wchar_t is not yet supported"
+ wcerr << "error: using expat with wchar_t is not supported"
+ << endl;
+
+ return false;
+ }
+
+ //
+ //
+ if (options.value<CLI::xml_parser> () == "expat" &&
+ !options.value<CLI::char_encoding> ().empty () &&
+ options.value<CLI::char_encoding> () != "utf8")
+ {
+ wcerr << "error: using expat with character encoding other than "
+ << "utf8 is not supported"
<< endl;
return false;
diff --git a/xsd/cxx/tree/cli.hxx b/xsd/cxx/tree/cli.hxx
index 9ccf405..c9078e7 100644
--- a/xsd/cxx/tree/cli.hxx
+++ b/xsd/cxx/tree/cli.hxx
@@ -24,6 +24,7 @@ namespace CXX
typedef Char const Key[];
extern Key char_type;
+ extern Key char_encoding;
extern Key output_dir;
extern Key generate_polymorphic;
extern Key generate_serialization;
@@ -119,6 +120,7 @@ namespace CXX
typedef Cult::CLI::Options<
char_type, NarrowString,
+ char_encoding, NarrowString,
output_dir, NarrowString,
generate_polymorphic, Boolean,
generate_serialization, Boolean,
diff --git a/xsd/cxx/tree/counter.cxx b/xsd/cxx/tree/counter.cxx
index d8223bb..a9649b5 100644
--- a/xsd/cxx/tree/counter.cxx
+++ b/xsd/cxx/tree/counter.cxx
@@ -239,7 +239,7 @@ namespace CXX
count (CLI::Options const& options, SemanticGraph::Schema& tu)
{
Counts counts;
- Context ctx (std::wcerr, tu, options, counts, false, 0, 0, 0);
+ Context ctx (std::wcerr, tu, options, counts, false, 0, 0, 0, 0);
Traversal::Schema schema;
Traversal::Sources sources;
diff --git a/xsd/cxx/tree/elements.cxx b/xsd/cxx/tree/elements.cxx
index db1d858..444caa4 100644
--- a/xsd/cxx/tree/elements.cxx
+++ b/xsd/cxx/tree/elements.cxx
@@ -39,12 +39,15 @@ namespace CXX
CLI::Options const& ops,
Counts const& counts_,
Boolean generate_xml_schema__,
+ StringLiteralMap const* map,
Regex const* fe,
Regex const* he,
Regex const* ie)
: CXX::Context (o,
root,
+ map,
ops.value<CLI::char_type> (),
+ ops.value<CLI::char_encoding> (),
ops.value<CLI::include_with_brackets> (),
ops.value<CLI::include_prefix> (),
ops.value<CLI::export_symbol> (),
diff --git a/xsd/cxx/tree/elements.hxx b/xsd/cxx/tree/elements.hxx
index 602291d..a0cb1d9 100644
--- a/xsd/cxx/tree/elements.hxx
+++ b/xsd/cxx/tree/elements.hxx
@@ -117,6 +117,7 @@ namespace CXX
CLI::Options const& ops,
Counts const& counts_,
Boolean generate_xml_schema,
+ StringLiteralMap const*,
Regex const* fwd_expr,
Regex const* hxx_expr,
Regex const* ixx_expr);
diff --git a/xsd/cxx/tree/generator.cxx b/xsd/cxx/tree/generator.cxx
index f9b055e..b81504c 100644
--- a/xsd/cxx/tree/generator.cxx
+++ b/xsd/cxx/tree/generator.cxx
@@ -116,6 +116,7 @@ namespace CXX
namespace CLI
{
extern Key char_type = "char-type";
+ extern Key char_encoding = "char-encoding";
extern Key output_dir = "output-dir";
extern Key generate_polymorphic = "generate-polymorphic";
extern Key generate_serialization = "generate-serialization";
@@ -220,12 +221,19 @@ namespace CXX
<< " values are 'char' (default) and 'wchar_t'."
<< endl;
+ e << "--char-encoding <enc>" << endl
+ << " Specify the character encoding that should be used\n"
+ << " in the object model. Valid values for the 'char'\n"
+ << " character type are 'utf8' (default), 'iso8859-1',\n"
+ << " 'lcp', and 'custom'. For the 'wchar_t' character\n"
+ << " type the only valid value is 'auto'."
+ << endl;
+
e << "--output-dir <dir>" << endl
<< " Write generated files to <dir> instead of current\n"
<< " directory."
<< endl;
-
e << "--generate-polymorphic" << endl
<< " Generate polymorphism-aware code. Specify this\n"
<< " option if you use substitution groups or xsi:type."
@@ -670,6 +678,11 @@ namespace CXX
<< " separate the file name from the part number."
<< endl;
+ e << "--custom-literals <file>" << endl
+ << " Load custom XML string to C++ literal mappings\n"
+ << " from <file>."
+ << endl;
+
e << "--export-symbol <symbol>" << endl
<< " Export symbol for Win32 DLL export/import control."
<< endl;
@@ -803,6 +816,7 @@ namespace CXX
generate (Tree::CLI::Options const& ops,
Schema& schema,
Path const& file_path,
+ StringLiteralMap const& string_literal_map,
const WarningSet& disabled_warnings,
FileList& file_list,
AutoUnlinks& unlinks)
@@ -860,7 +874,7 @@ namespace CXX
//
{
NameProcessor proc;
- if (!proc.process (ops, schema, file_path))
+ if (!proc.process (ops, schema, file_path, string_literal_map))
throw Failed ();
}
@@ -1179,8 +1193,15 @@ namespace CXX
//
if (forward)
{
- Context ctx (fwd, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (fwd,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> fwd_sloc (fwd);
@@ -1287,8 +1308,15 @@ namespace CXX
// HXX
//
{
- Context ctx (hxx, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (hxx,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> hxx_sloc (hxx);
@@ -1434,8 +1462,15 @@ namespace CXX
//
if (inline_)
{
- Context ctx (ixx, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (ixx,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> ixx_sloc (ixx);
@@ -1560,8 +1595,15 @@ namespace CXX
WideOutputFileStream& os (*cxx[part]);
- Context ctx (os, schema, ops, counts, generate_xml_schema,
- &fwd_expr, &hxx_expr, &ixx_expr);
+ Context ctx (os,
+ schema,
+ ops,
+ counts,
+ generate_xml_schema,
+ &string_literal_map,
+ &fwd_expr,
+ &hxx_expr,
+ &ixx_expr);
Indentation::Clip<Indentation::SLOC, WideChar> cxx_sloc (os);
@@ -1644,6 +1686,17 @@ namespace CXX
return sloc;
}
+ catch (UnrepresentableCharacter const& e)
+ {
+ wcerr << "error: character at position " << e.position () << " "
+ << "in string '" << e.string () << "' is unrepresentable in "
+ << "the target encoding" << endl;
+
+ wcerr << "info: use the --custom-literals option to provide custom "
+ << "string literals mapping" << endl;
+
+ throw Failed ();
+ }
catch (NoNamespaceMapping const& e)
{
wcerr << e.file () << ":" << e.line () << ":" << e.column ()
diff --git a/xsd/cxx/tree/generator.hxx b/xsd/cxx/tree/generator.hxx
index 1aa3c60..a66ede0 100644
--- a/xsd/cxx/tree/generator.hxx
+++ b/xsd/cxx/tree/generator.hxx
@@ -13,6 +13,7 @@
#include <xsd.hxx>
+#include <cxx/literal-map.hxx>
#include <cxx/tree/cli.hxx>
namespace CXX
@@ -36,6 +37,7 @@ namespace CXX
generate (CLI::Options const& options,
XSDFrontend::SemanticGraph::Schema&,
XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const&,
const WarningSet& disabled_warnings,
FileList& file_list,
AutoUnlinks& unlinks);
diff --git a/xsd/cxx/tree/name-processor.cxx b/xsd/cxx/tree/name-processor.cxx
index 53027af..e15b072 100644
--- a/xsd/cxx/tree/name-processor.cxx
+++ b/xsd/cxx/tree/name-processor.cxx
@@ -4,7 +4,6 @@
// license : GNU GPL v2 + exceptions; see accompanying LICENSE file
#include <cxx/tree/name-processor.hxx>
-#include <cxx/tree/elements.hxx>
#include <backend-elements/regex.hxx>
@@ -43,12 +42,14 @@ namespace CXX
Counts const& counts,
Boolean generate_xml_schema,
SemanticGraph::Schema& root,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
: Tree::Context (std::wcerr,
root,
options,
counts,
generate_xml_schema,
+ &map,
0,
0,
0),
@@ -1970,12 +1971,13 @@ namespace CXX
Boolean
process_impl (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
try
{
Counts counts;
- Context ctx (ops, counts, false, tu, file);
+ Context ctx (ops, counts, false, tu, file, map);
if (tu.names_begin ()->named ().name () ==
L"http://www.w3.org/2001/XMLSchema")
@@ -2096,9 +2098,10 @@ namespace CXX
Boolean NameProcessor::
process (CLI::Options const& ops,
SemanticGraph::Schema& tu,
- SemanticGraph::Path const& file)
+ SemanticGraph::Path const& file,
+ StringLiteralMap const& map)
{
- return process_impl (ops, tu, file);
+ return process_impl (ops, tu, file, map);
}
}
}
diff --git a/xsd/cxx/tree/name-processor.hxx b/xsd/cxx/tree/name-processor.hxx
index 9b8eac9..18c3b82 100644
--- a/xsd/cxx/tree/name-processor.hxx
+++ b/xsd/cxx/tree/name-processor.hxx
@@ -6,11 +6,7 @@
#ifndef CXX_TREE_NAME_PROCESSOR_HXX
#define CXX_TREE_NAME_PROCESSOR_HXX
-#include <cult/types.hxx>
-
-#include <xsd-frontend/semantic-graph.hxx>
-
-#include <cxx/tree/cli.hxx>
+#include <cxx/tree/elements.hxx>
namespace CXX
{
@@ -26,7 +22,8 @@ namespace CXX
Boolean
process (CLI::Options const&,
XSDFrontend::SemanticGraph::Schema&,
- XSDFrontend::SemanticGraph::Path const& file);
+ XSDFrontend::SemanticGraph::Path const& file,
+ StringLiteralMap const&);
};
}
}
diff --git a/xsd/cxx/tree/tree-forward.cxx b/xsd/cxx/tree/tree-forward.cxx
index cceedb7..02c4317 100644
--- a/xsd/cxx/tree/tree-forward.cxx
+++ b/xsd/cxx/tree/tree-forward.cxx
@@ -152,6 +152,13 @@ namespace CXX
}
else
{
+ if (ctx.char_type == L"char" && ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding <<
+ ".hxx>" << endl
+ << endl;
+ }
+
ctx.os << "#include <xsd/cxx/tree/exceptions.hxx>" << endl
<< "#include <xsd/cxx/tree/elements.hxx>" << endl
<< "#include <xsd/cxx/tree/types.hxx>" << endl
diff --git a/xsd/cxx/tree/tree-header.cxx b/xsd/cxx/tree/tree-header.cxx
index 7bb630c..9b39739 100644
--- a/xsd/cxx/tree/tree-header.cxx
+++ b/xsd/cxx/tree/tree-header.cxx
@@ -3539,6 +3539,13 @@ namespace CXX
{
if (ctx.generate_xml_schema)
{
+ if (ctx.char_type == L"char" && ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding <<
+ ".hxx>" << endl
+ << endl;
+ }
+
ctx.os << "#include <xsd/cxx/tree/exceptions.hxx>" << endl
<< "#include <xsd/cxx/tree/elements.hxx>" << endl
<< "#include <xsd/cxx/tree/types.hxx>" << endl
@@ -3671,6 +3678,13 @@ namespace CXX
<< "#include <algorithm> // std::binary_search" << endl
<< endl;
+ if (ctx.char_type == L"char" && ctx.char_encoding != L"custom")
+ {
+ ctx.os << "#include <xsd/cxx/xml/char-" << ctx.char_encoding <<
+ ".hxx>" << endl
+ << endl;
+ }
+
ctx.os << "#include <xsd/cxx/tree/exceptions.hxx>" << endl
<< "#include <xsd/cxx/tree/elements.hxx>" << endl
<< "#include <xsd/cxx/tree/containers.hxx>" << endl
diff --git a/xsd/cxx/tree/validator.cxx b/xsd/cxx/tree/validator.cxx
index 7ef23fa..5742e7a 100644
--- a/xsd/cxx/tree/validator.cxx
+++ b/xsd/cxx/tree/validator.cxx
@@ -38,6 +38,7 @@ namespace CXX
generate_xml_schema,
0,
0,
+ 0,
0),
disabled_warnings_ (disabled_warnings),
disabled_warnings_all_ (false),